1
1
mirror of https://github.com/github/semantic.git synced 2024-12-24 23:42:31 +03:00

Merge branch 'master' into what-the-hspec

This commit is contained in:
Rob Rix 2017-04-08 10:30:22 -04:00 committed by GitHub
commit 5349354910
37 changed files with 576 additions and 483 deletions

View File

@ -3,4 +3,4 @@ module Text.Parser.TreeSitter.C where
import Text.Parser.TreeSitter
import Foreign.Ptr
foreign import ccall "vendor/tree-sitter-c/src/parser.c tree_sitter_c" tree_sitter_c :: Ptr Language
foreign import ccall unsafe "vendor/tree-sitter-c/src/parser.c tree_sitter_c" tree_sitter_c :: Ptr Language

View File

@ -3,4 +3,4 @@ module Text.Parser.TreeSitter.Go where
import Text.Parser.TreeSitter
import Foreign.Ptr
foreign import ccall "vendor/tree-sitter-go/src/parser.c tree_sitter_go" tree_sitter_go :: Ptr Language
foreign import ccall unsafe "vendor/tree-sitter-go/src/parser.c tree_sitter_go" tree_sitter_go :: Ptr Language

View File

@ -3,4 +3,4 @@ module Text.Parser.TreeSitter.JavaScript where
import Text.Parser.TreeSitter
import Foreign.Ptr
foreign import ccall "vendor/tree-sitter-javascript/src/parser.c tree_sitter_javascript" tree_sitter_javascript :: Ptr Language
foreign import ccall unsafe "vendor/tree-sitter-javascript/src/parser.c tree_sitter_javascript" tree_sitter_javascript :: Ptr Language

View File

@ -3,4 +3,4 @@ module Text.Parser.TreeSitter.Ruby where
import Text.Parser.TreeSitter
import Foreign.Ptr
foreign import ccall "vendor/tree-sitter-ruby/src/parser.c tree_sitter_ruby" tree_sitter_ruby :: Ptr Language
foreign import ccall unsafe "vendor/tree-sitter-ruby/src/parser.c tree_sitter_ruby" tree_sitter_ruby :: Ptr Language

View File

@ -3,4 +3,4 @@ module Text.Parser.TreeSitter.TypeScript where
import Foreign.Ptr
import Text.Parser.TreeSitter
foreign import ccall "vendor/tree-sitter-typescript/src/parser.c tree_sitter_typescript" tree_sitter_typescript :: Ptr Language
foreign import ccall unsafe "vendor/tree-sitter-typescript/src/parser.c tree_sitter_typescript" tree_sitter_typescript :: Ptr Language

View File

@ -17,6 +17,8 @@ library
, Alignment
, Arguments
, Category
, Command
, Command.Parse
, Data.Align.Generic
, Data.Functor.Both
, Data.Functor.Classes.Eq.Generic
@ -27,7 +29,6 @@ library
, Data.Record
, Data.Text.Listable
, Diff
, DiffCommand
, Info
, Interpreter
, Language
@ -37,7 +38,6 @@ library
, Language.Markdown
, Language.Go
, Language.Ruby
, ParseCommand
, Parser
, Patch
, Paths_semantic_diff
@ -90,11 +90,13 @@ library
, MonadRandom
, mtl
, optparse-applicative
, parallel
, pointed
, protolude
, recursion-schemes
, regex-compat
, semigroups
, split
, text >= 1.2.1.3
, text-icu
, these
@ -147,6 +149,9 @@ test-suite test
hs-source-dirs: test
main-is: Spec.hs
other-modules: AlignmentSpec
, Command.Spec
, Command.Diff.Spec
, Command.Parse.Spec
, Data.Mergeable.Spec
, Data.RandomWalkSimilarity.Spec
, DiffSpec
@ -160,8 +165,6 @@ test-suite test
, TermSpec
, TOCSpec
, IntegrationSpec
, DiffCommandSpec
, ParseCommandSpec
, Test.Hspec.LeanCheck
build-depends: aeson
, array

View File

@ -3,7 +3,7 @@ module Arguments (Arguments(..), CmdLineOptions(..), DiffMode(..), ExtraArg(..),
import Data.Functor.Both
import Data.Maybe
import Data.Text
import Data.List.Split
import Prologue hiding ((<>))
import Prelude
import System.Environment
@ -40,7 +40,7 @@ data CmdLineOptions = CmdLineOptions
-- | Arguments for the program (includes command line, environment, and defaults).
data Arguments = Arguments
{ gitDir :: FilePath
, alternateObjectDirs :: [Text]
, alternateObjectDirs :: [FilePath]
, format :: R.Format
, timeoutInMicroseconds :: Int
, outputPath :: Maybe FilePath
@ -59,7 +59,7 @@ programArguments CmdLineOptions{..} = do
gitDir <- fromMaybe pwd <$> lookupEnv "GIT_DIR"
eitherObjectDirs <- try $ parseObjectDirs . toS <$> getEnv "GIT_ALTERNATE_OBJECT_DIRECTORIES"
outputPath <- getOutputPath outputFilePath
let alternateObjectDirs = case (eitherObjectDirs :: Either IOError [Text]) of
let alternateObjectDirs = case (eitherObjectDirs :: Either IOError [FilePath]) of
(Left _) -> []
(Right objectDirs) -> objectDirs
@ -149,5 +149,5 @@ defaultTimeout = 7 * 1000000
toMicroseconds :: Float -> Int
toMicroseconds num = floor $ num * 1000000
parseObjectDirs :: Text -> [Text]
parseObjectDirs = split (== ':')
parseObjectDirs :: FilePath -> [FilePath]
parseObjectDirs = splitWhen (== ':')

192
src/Command.hs Normal file
View File

@ -0,0 +1,192 @@
{-# LANGUAGE DataKinds, GADTs #-}
module Command
( Command
-- Constructors
, readFile
, readFilesAtSHAs
, parse
, parseBlob
, diff
, maybeDiff
, renderDiffs
, concurrently
-- Evaluation
, runCommand
) where
import Command.Parse
import qualified Control.Concurrent.Async.Pool as Async
import Control.Exception (catch)
import Control.Monad.Free.Freer
import Control.Monad.IO.Class
import Control.Parallel.Strategies
import qualified Data.ByteString as B
import Data.Functor.Both
import Data.List ((\\), nub)
import Data.RandomWalkSimilarity
import Data.Record
import Data.String
import Diff
import Info
import Interpreter
import GHC.Conc (numCapabilities)
import qualified Git
import Git.Blob
import Git.Libgit2
import Git.Libgit2.Backend
import Git.Repository
import Git.Types
import GitmonClient
import Language
import Patch
import Prologue hiding (concurrently, Concurrently, readFile)
import Renderer
import Source
import Syntax
import System.FilePath
import Term
-- | High-level commands encapsulating the work done to perform a diff or parse operation.
type Command = Freer CommandF
-- Constructors
-- | Read a regular file into a SourceBlob.
readFile :: FilePath -> Command (Maybe SourceBlob)
readFile path = ReadFile path `Then` return
-- | Read a list of files at the states corresponding to the given shas.
readFilesAtSHAs
:: FilePath -- ^ GIT_DIR
-> [FilePath] -- ^ GIT_ALTERNATE_OBJECT_DIRECTORIES
-> [FilePath] -- ^ Specific paths to diff. If empty, diff all changed paths.
-> Both String -- ^ The commit shas for the before & after states.
-> Command [(FilePath, Both (Maybe SourceBlob))] -- ^ A command producing a list of pairs of blobs for the specified files (or all files if none were specified).
readFilesAtSHAs gitDir alternateObjectDirs paths shas = ReadFilesAtSHAs gitDir alternateObjectDirs paths shas `Then` return
-- | Parse a blob in a given language.
parse :: Maybe Language -> SourceBlob -> Command (Term (Syntax Text) (Record DefaultFields))
parse language blob = Parse language blob `Then` return
-- | Parse a blob in the language selected for its file extension.
parseBlob :: SourceBlob -> Command (Term (Syntax Text) (Record DefaultFields))
parseBlob blob = parse (languageForType (takeExtension (path blob))) blob
-- | Diff two terms.
diff :: HasField fields Category => Both (Term (Syntax Text) (Record fields)) -> Command (Diff (Syntax Text) (Record fields))
diff terms = Diff (terms `using` parTraversable rpar) `Then` return
-- | Diff two terms, producing an insertion/deletion when one is missing and Nothing when both are missing.
maybeDiff :: HasField fields Category => Both (Maybe (Term (Syntax Text) (Record fields))) -> Command (Maybe (Diff (Syntax Text) (Record fields)))
maybeDiff terms = case runJoin terms of
(Just term1, Nothing) -> return (Just (pure (Delete term1)))
(Nothing, Just term2) -> return (Just (pure (Insert term2)))
(Just term1, Just term2) -> Just <$> diff (both term1 term2)
(Nothing, Nothing) -> return Nothing
-- | Render a diff using the specified renderer.
renderDiffs :: (NFData (Record fields), Monoid output) => DiffRenderer fields output -> [(Both SourceBlob, Diff (Syntax Text) (Record fields))] -> Command output
renderDiffs renderer diffs = RenderDiffs renderer (diffs `using` parTraversable (parTuple2 r0 rdeepseq)) `Then` return
-- | Run a function over each element of a Traversable concurrently.
concurrently :: Traversable t => t a -> (a -> Command b) -> Command (t b)
concurrently ts f = Concurrently ts f `Then` return
-- Evaluation
-- | Run the passed command and return its results in IO.
runCommand :: Command a -> IO a
runCommand = iterFreerA $ \ command yield -> case command of
ReadFile path -> runReadFile path >>= yield
ReadFilesAtSHAs gitDir alternateObjectDirs paths shas -> runReadFilesAtSHAs gitDir alternateObjectDirs paths shas >>= yield
Parse language blob -> runParse language blob >>= yield
Diff terms -> yield (runDiff terms)
RenderDiffs renderer diffs -> yield (runRenderDiffs renderer diffs)
Concurrently ts f -> do
results <- Async.withTaskGroup numCapabilities $ \ group -> Async.runTask group $ traverse (Async.task . runCommand . f) ts
yield results
LiftIO io -> io >>= yield
-- Implementation details
data CommandF f where
ReadFile :: FilePath -> CommandF (Maybe SourceBlob)
ReadFilesAtSHAs :: FilePath -> [FilePath] -> [FilePath] -> Both String -> CommandF [(FilePath, Both (Maybe SourceBlob))]
Parse :: Maybe Language -> SourceBlob -> CommandF (Term (Syntax Text) (Record DefaultFields))
Diff :: HasField fields Category => Both (Term (Syntax Text) (Record fields)) -> CommandF (Diff (Syntax Text) (Record fields))
RenderDiffs :: Monoid output => DiffRenderer fields output -> [(Both SourceBlob, Diff (Syntax Text) (Record fields))] -> CommandF output
Concurrently :: Traversable t => t a -> (a -> Command b) -> CommandF (t b)
LiftIO :: IO a -> CommandF a
runReadFile :: FilePath -> IO (Maybe SourceBlob)
runReadFile path = do
raw <- (Just <$> B.readFile path) `catch` (const (return Nothing) :: IOException -> IO (Maybe ByteString))
source <- traverse transcode raw
return (flip sourceBlob path <$> source)
runReadFilesAtSHAs :: FilePath -> [FilePath] -> [FilePath] -> Both String -> IO [(FilePath, Both (Maybe SourceBlob))]
runReadFilesAtSHAs gitDir alternateObjectDirs paths shas = do
paths <- case paths of
[] -> runGit $ do
trees <- for shas treeForSha
paths <- for trees (reportGitmon "ls-tree" . treeBlobEntries)
pure . nub $! (\ (p, _, _) -> toS p) <$> runBothWith (\\) paths <> runBothWith (flip (\\)) paths
_ -> pure paths
Async.withTaskGroup numCapabilities (\ group -> Async.runTask group (traverse (Async.task . runGit . blobsForPath) paths))
where treeForSha sha = do
obj <- parseObjOid (toS sha)
commit <- reportGitmon "cat-file" $ lookupCommit obj
reportGitmon "cat-file" $ lookupTree (commitTree commit)
blobForPathInTree path tree = do
entry <- reportGitmon "ls-tree" $ treeEntry tree (toS path)
case entry of
Just (BlobEntry entryOid entryKind) -> do
blob <- reportGitmon "cat-file" $ lookupBlob entryOid
contents <- blobToByteString blob
transcoded <- liftIO $ transcode contents
let oid = renderObjOid $ blobOid blob
pure (Just (SourceBlob transcoded (toS oid) path (Just (toSourceKind entryKind))))
_ -> pure Nothing
runGit :: ReaderT LgRepo IO a -> IO a
runGit action = withRepository lgFactory gitDir $ do
repo <- getRepository
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
action
blobsForPath path = do
trees <- traverse treeForSha shas
(,) path <$> traverse (blobForPathInTree path) trees
toSourceKind (Git.PlainBlob mode) = Source.PlainBlob mode
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode
runParse :: Maybe Language -> SourceBlob -> IO (Term (Syntax Text) (Record DefaultFields))
runParse = maybe lineByLineParser parserForLanguage
runDiff :: HasField fields Category => Both (Term (Syntax Text) (Record fields)) -> Diff (Syntax Text) (Record fields)
runDiff terms = stripDiff (runBothWith diffTerms (fmap decorate terms))
where decorate = defaultFeatureVectorDecorator getLabel
getLabel :: HasField fields Category => TermF (Syntax Text) (Record fields) a -> (Category, Maybe Text)
getLabel (h :< t) = (Info.category h, case t of
Leaf s -> Just s
_ -> Nothing)
runRenderDiffs :: Monoid output => DiffRenderer fields output -> [(Both SourceBlob, Diff (Syntax Text) (Record fields))] -> output
runRenderDiffs = runDiffRenderer
instance MonadIO Command where
liftIO io = LiftIO io `Then` return

View File

@ -1,10 +1,13 @@
{-# LANGUAGE DataKinds, TypeOperators, ScopedTypeVariables #-}
module ParseCommand where
{-# LANGUAGE DataKinds, GADTs, GeneralizedNewtypeDeriving, ScopedTypeVariables, TypeFamilies, TypeOperators #-}
module Command.Parse where
import Arguments
import Category
import Data.Aeson (ToJSON, toJSON, encode, object, (.=))
import Data.Aeson.Types (Pair)
import Data.Functor.Foldable hiding (Nil)
import Data.Record
import Data.String
import qualified Data.Text as T
import Git.Blob
import Git.Libgit2
@ -29,17 +32,21 @@ import Text.Parser.TreeSitter.JavaScript
import Text.Parser.TreeSitter.Ruby
import Text.Parser.TreeSitter.TypeScript
data ParseTreeFile = ParseTreeFile { parseTreeFilePath :: FilePath, node :: ParseNode } deriving (Show)
data ParseTreeFile = ParseTreeFile { parseTreeFilePath :: FilePath, node :: Rose ParseNode } deriving (Show)
data Rose a = Rose a [Rose a]
deriving (Eq, Show)
instance ToJSON ParseTreeFile where
toJSON ParseTreeFile{..} = object [ "filePath" .= parseTreeFilePath, "programNode" .= node ]
toJSON ParseTreeFile{..} = object [ "filePath" .= parseTreeFilePath, "programNode" .= cata algebra node ]
where algebra (RoseF a as) = object $ parseNodeToJSONFields a <> [ "children" .= as ]
data IndexFile = IndexFile { indexFilePath :: FilePath, nodes :: [ParseNode] } deriving (Show)
instance ToJSON IndexFile where
toJSON IndexFile{..} = object [ "filePath" .= indexFilePath, "programNodes" .= nodes ]
toJSON IndexFile{..} = object [ "filePath" .= indexFilePath, "programNodes" .= foldMap (singleton . object . parseNodeToJSONFields) nodes ]
where singleton a = [a]
data ParseNode = ParseNode
{ category :: Text
@ -47,17 +54,15 @@ data ParseNode = ParseNode
, sourceText :: Maybe SourceText
, sourceSpan :: SourceSpan
, identifier :: Maybe Text
, children :: Maybe [ParseNode]
}
deriving (Show)
instance ToJSON ParseNode where
toJSON ParseNode{..} =
object
$ [ "category" .= category, "sourceRange" .= sourceRange, "sourceSpan" .= sourceSpan ]
<> [ "sourceText" .= sourceText | isJust sourceText ]
<> [ "identifier" .= identifier | isJust identifier ]
<> [ "children" .= children | isJust children ]
-- | Produce a list of JSON 'Pair's for the fields in a given ParseNode.
parseNodeToJSONFields :: ParseNode -> [Pair]
parseNodeToJSONFields ParseNode{..} =
[ "category" .= category, "sourceRange" .= sourceRange, "sourceSpan" .= sourceSpan ]
<> [ "sourceText" .= sourceText | isJust sourceText ]
<> [ "identifier" .= identifier | isJust identifier ]
-- | Parses file contents into an SExpression format for the provided arguments.
parseSExpression :: Arguments -> IO ByteString
@ -65,42 +70,33 @@ parseSExpression =
pure . printTerms TreeOnly <=< parse <=< sourceBlobsFromArgs
where parse = traverse (\sourceBlob@SourceBlob{..} -> parserForType (toS (takeExtension path)) sourceBlob)
type RAlgebra t a = Base t (t, a) -> a
parseRoot :: (FilePath -> f ParseNode -> root) -> (ParseNode -> [f ParseNode] -> f ParseNode) -> Arguments -> IO [root]
parseRoot construct combine args@Arguments{..} = do
blobs <- sourceBlobsFromArgs args
for blobs (\ sourceBlob@SourceBlob{..} -> do
parsedTerm <- parseWithDecorator (decorator source) path sourceBlob
pure $! construct path (para algebra parsedTerm))
where algebra (annotation :< syntax) = combine (makeNode annotation (Prologue.fst <$> syntax)) (toList (Prologue.snd <$> syntax))
decorator = parseDecorator debug
makeNode :: Record (Maybe SourceText ': DefaultFields) -> Syntax Text (Term (Syntax Text) (Record (Maybe SourceText ': DefaultFields))) -> ParseNode
makeNode (head :. range :. category :. sourceSpan :. Nil) syntax =
ParseNode (toS category) range head sourceSpan (identifierFor syntax)
-- | Constructs IndexFile nodes for the provided arguments and encodes them to JSON.
parseIndex :: Arguments -> IO ByteString
parseIndex args@Arguments{..} = fmap (toS . encode) $ buildParseNodes IndexFile algebra (parseDecorator debug) =<< sourceBlobsFromArgs args
where
algebra :: StringConv leaf T.Text => TermF (Syntax leaf) (Record '[(Maybe SourceText), Range, Category, SourceSpan]) (Term (Syntax leaf) (Record '[(Maybe SourceText), Range, Category, SourceSpan]), [ParseNode]) -> [ParseNode]
algebra (annotation :< syntax) = ParseNode ((toS . Info.category) annotation) (byteRange annotation) (rhead annotation) (Info.sourceSpan annotation) (identifierFor (Prologue.fst <$> syntax)) Nothing : (Prologue.snd =<< toList syntax)
parseIndex = fmap (toS . encode) . parseRoot IndexFile (\ node siblings -> node : concat siblings)
-- | Constructs ParseTreeFile nodes for the provided arguments and encodes them to JSON.
parseTree :: Arguments -> IO ByteString
parseTree args@Arguments{..} = fmap (toS . encode) $ buildParseNodes ParseTreeFile algebra (parseDecorator debug) =<< sourceBlobsFromArgs args
where
algebra :: StringConv leaf T.Text => TermF (Syntax leaf) (Record '[(Maybe SourceText), Range, Category, SourceSpan]) (Term (Syntax leaf) (Record '[(Maybe SourceText), Range, Category, SourceSpan]), ParseNode) -> ParseNode
algebra (annotation :< syntax) = ParseNode ((toS . Info.category) annotation) (byteRange annotation) (rhead annotation) (Info.sourceSpan annotation) (identifierFor (Prologue.fst <$> syntax)) (Just (Prologue.snd <$> toList syntax))
parseTree = fmap (toS . encode) . parseRoot ParseTreeFile Rose
-- | Determines the term decorator to use when parsing.
parseDecorator :: (Functor f, HasField fields Range) => Bool -> (Source -> TermDecorator f fields (Maybe SourceText))
parseDecorator True = termSourceTextDecorator
parseDecorator False = const . const Nothing
-- | Function context for constructing parse nodes given a parse node constructor, an algebra (for a paramorphism), a function that takes a file's source and returns a term decorator, and a list of source blobs.
-- This function is general over b such that b represents IndexFile or ParseTreeFile.
buildParseNodes
:: forall nodes b. (FilePath -> nodes -> b)
-> (CofreeF (Syntax Text) (Record '[Maybe SourceText, Range, Category, SourceSpan]) (Cofree (Syntax Text) (Record '[Maybe SourceText, Range, Category, SourceSpan]), nodes) -> nodes)
-> (Source -> TermDecorator (Syntax Text) '[Range, Category, SourceSpan] (Maybe SourceText))
-> [SourceBlob]
-> IO [b]
buildParseNodes programNodeConstructor algebra termDecorator sourceBlobs =
for sourceBlobs buildParseNode
where
buildParseNode :: SourceBlob -> IO b
buildParseNode sourceBlob@SourceBlob{..} = do
parsedTerm <- parseWithDecorator (termDecorator source) path sourceBlob
let parseNode = para algebra parsedTerm
pure $ programNodeConstructor path parseNode
-- | For the given absolute file paths, retrieves their source blobs.
sourceBlobsFromPaths :: [FilePath] -> IO [SourceBlob]
sourceBlobsFromPaths filePaths =
@ -109,7 +105,7 @@ sourceBlobsFromPaths filePaths =
pure $ Source.SourceBlob source mempty filePath (Just Source.defaultPlainBlob))
-- | For the given sha, git repo path, and file paths, retrieves the source blobs.
sourceBlobsFromSha :: [Char] -> [Char] -> [FilePath] -> IO [SourceBlob]
sourceBlobsFromSha :: String -> String -> [FilePath] -> IO [SourceBlob]
sourceBlobsFromSha commitSha gitDir filePaths = do
maybeBlobs <- withRepository lgFactory gitDir $ do
repo <- getRepository
@ -139,7 +135,7 @@ sourceBlobsFromSha commitSha gitDir filePaths = do
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode
-- | Returns a Just identifier text if the given Syntax term contains an identifier (leaf) syntax. Otherwise returns Nothing.
identifierFor :: StringConv leaf T.Text => Syntax leaf (Term (Syntax leaf) (Record '[(Maybe SourceText), Range, Category, SourceSpan])) -> Maybe T.Text
identifierFor :: (HasField fields (Maybe SourceText), HasField fields Category, StringConv leaf Text) => Syntax leaf (Term (Syntax leaf) (Record fields)) -> Maybe Text
identifierFor = fmap toS . extractLeafValue . unwrap <=< maybeIdentifier
-- | For the file paths and commit sha provided, extract only the BlobEntries and represent them as SourceBlobs.
@ -150,34 +146,42 @@ sourceBlobsFromArgs Arguments{..} =
_ -> sourceBlobsFromPaths filePaths
-- | Return a parser incorporating the provided TermDecorator.
parseWithDecorator :: TermDecorator (Syntax Text) '[Range, Category, SourceSpan] field -> FilePath -> Parser (Syntax Text) (Record '[field, Range, Category, SourceSpan])
parseWithDecorator :: TermDecorator (Syntax Text) DefaultFields field -> FilePath -> Parser (Syntax Text) (Record (field ': DefaultFields))
parseWithDecorator decorator path blob = decorateTerm decorator <$> parserForType (toS (takeExtension path)) blob
-- | Return a parser based on the file extension (including the ".").
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
parserForType mediaType = case languageForType mediaType of
Just C -> treeSitterParser C tree_sitter_c
Just JavaScript -> treeSitterParser JavaScript tree_sitter_javascript
Just TypeScript -> treeSitterParser TypeScript tree_sitter_typescript
Just Markdown -> cmarkParser
Just Ruby -> treeSitterParser Ruby tree_sitter_ruby
Just Language.Go -> treeSitterParser Language.Go tree_sitter_go
_ -> lineByLineParser
parserForType :: String -> Parser (Syntax Text) (Record DefaultFields)
parserForType mediaType = maybe lineByLineParser parserForLanguage (languageForType mediaType)
-- | Select a parser for a given Language.
parserForLanguage :: Language -> Parser (Syntax Text) (Record DefaultFields)
parserForLanguage language = case language of
C -> treeSitterParser C tree_sitter_c
JavaScript -> treeSitterParser JavaScript tree_sitter_javascript
TypeScript -> treeSitterParser TypeScript tree_sitter_typescript
Markdown -> cmarkParser
Ruby -> treeSitterParser Ruby tree_sitter_ruby
Language.Go -> treeSitterParser Language.Go tree_sitter_go
-- | Decorate a 'Term' using a function to compute the annotation values at every node.
decorateTerm :: (Functor f) => TermDecorator f fields field -> Term f (Record fields) -> Term f (Record (field ': fields))
decorateTerm decorator = cata $ \ term -> cofree ((decorator (extract <$> term) :. headF term) :< tailF term)
decorateTerm decorator = cata $ \ term -> cofree ((decorator term :. headF term) :< tailF term)
-- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms.
type TermDecorator f fields field = TermF f (Record fields) (Record (field ': fields)) -> field
type TermDecorator f fields field = TermF f (Record fields) (Term f (Record (field ': fields))) -> field
-- | Term decorator extracting the source text for a term.
termSourceTextDecorator :: (Functor f, HasField fields Range) => Source -> TermDecorator f fields (Maybe SourceText)
termSourceTextDecorator source term = Just . SourceText . toText $ Source.slice range' source
where range' = byteRange $ headF term
termSourceTextDecorator source (ann :< _) = Just (SourceText (toText (Source.slice (byteRange ann) source)))
newtype Identifier = Identifier Text
deriving (Eq, Show, ToJSON)
identifierDecorator :: (HasField fields Category, StringConv leaf Text) => TermDecorator (Syntax leaf) fields (Maybe Identifier)
identifierDecorator = fmap (Command.Parse.Identifier . toS) . extractLeafValue . unwrap <=< maybeIdentifier . tailF
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
lineByLineParser :: Parser (Syntax Text) (Record DefaultFields)
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> cofree <$> leaves
where
@ -189,5 +193,17 @@ lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLea
(accum <> [ leaf charIndex (Source.toText line) ] , charIndex + Source.length line)
-- | Return the parser that should be used for a given path.
parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
parserForFilepath :: FilePath -> Parser (Syntax Text) (Record DefaultFields)
parserForFilepath = parserForType . toS . takeExtension
data RoseF a b = RoseF a [b]
deriving (Eq, Functor, Show)
type instance Base (Rose a) = RoseF a
instance Recursive (Rose a) where
project (Rose a tree) = RoseF a tree
instance Corecursive (Rose a) where
embed (RoseF a tree) = Rose a tree

View File

@ -1,17 +1,11 @@
{-# LANGUAGE DataKinds, GADTs, KindSignatures, MultiParamTypeClasses, TypeOperators, ConstraintKinds #-}
{-# LANGUAGE DataKinds, GADTs, KindSignatures, MultiParamTypeClasses, TypeOperators #-}
module Data.Record where
import Category
import Data.Aeson
import Data.Aeson.Types
import Data.Functor.Listable
import GHC.Show
import Prologue
import Range
import SourceSpan
-- | A type alias for HasField constraints commonly used throughout semantic-diff.
type DefaultFields fields = (HasField fields Category, HasField fields Range, HasField fields SourceSpan)
-- | A type-safe, extensible record structure.
-- |

View File

@ -1,180 +0,0 @@
{-# LANGUAGE DataKinds #-}
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
module DiffCommand where
import Data.Aeson hiding (json)
import Data.Functor.Both as Both
import Data.List ((\\))
import Data.String
import GHC.Conc (numCapabilities)
import Prologue hiding (fst, snd, null)
import qualified Control.Concurrent.Async.Pool as Async
import System.FilePath.Posix (hasExtension)
import Git.Blob
import Git.Libgit2
import Git.Libgit2.Backend
import Git.Repository
import Git.Types
import qualified Git
import Arguments
import Category
import Data.RandomWalkSimilarity
import Data.Record
import GitmonClient
import Info
import Diff
import Interpreter
import ParseCommand (parserForFilepath)
import Parser
import Patch
import Renderer
import Renderer.JSON
import Renderer.Patch
import Renderer.SExpression
import Renderer.Split
import Renderer.Summary
import Renderer.TOC
import Source
import Syntax
import Debug.Trace
diff :: Arguments -> IO ByteString
diff args@Arguments{..} = case diffMode of
PathDiff paths -> diffPaths args paths
CommitDiff -> diffCommits args
-- | Compare changes between two commits.
diffCommits :: Arguments -> IO ByteString
diffCommits args@Arguments{..} = do
outputs <- fetchDiffs args
pure $! concatOutputs outputs
-- | Compare two paths on the filesystem (compariable to git diff --no-index).
diffPaths :: Arguments -> Both FilePath -> IO ByteString
diffPaths args@Arguments{..} paths = do
sources <- traverse readAndTranscodeFile paths
let sourceBlobs = SourceBlob <$> sources <*> pure mempty <*> paths <*> pure (Just defaultPlainBlob)
printDiff (parserForFilepath path) args sourceBlobs
where
path = fromMaybe (panic "none of the paths have file extensions") $ find hasExtension paths
fetchDiffs :: Arguments -> IO [Output]
fetchDiffs args@Arguments{..} = do
paths <- case(filePaths, shaRange) of
([], Join (Just a, Just b)) -> pathsToDiff args (both a b)
(ps, _) -> pure ps
diffs <- Async.withTaskGroup numCapabilities . flip Async.mapTasks $
fetchDiff args <$> paths
pure $ uncurry (renderDiff args) <$> diffs
fetchDiff :: Arguments -> FilePath -> IO (Both SourceBlob, SyntaxDiff Text '[Range, Category, SourceSpan])
fetchDiff args@Arguments{..} filepath = withRepository lgFactory gitDir $ do
repo <- getRepository
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
lift $ runReaderT (go args filepath) repo
where
go :: Arguments -> FilePath -> ReaderT LgRepo IO (Both SourceBlob, SyntaxDiff Text '[Range, Category, SourceSpan])
go Arguments{..} filepath = do
liftIO $ traceEventIO ("START fetchDiff: " <> filepath)
sourcesAndOids <- sequence $ traverse (getSourceBlob filepath) <$> shaRange
let sources = fromMaybe (emptySourceBlob filepath) <$> sourcesAndOids
let sourceBlobs = idOrEmptySourceBlob <$> sources
diff <- liftIO $ diffFiles (parserForFilepath filepath) sourceBlobs
pure $! traceEvent ("END fetchDiff: " <> filepath) (sourceBlobs, diff)
-- | Returns a list of relative file paths that have changed between the given commit shas.
pathsToDiff :: Arguments -> Both String -> IO [FilePath]
pathsToDiff Arguments{..} shas = withRepository lgFactory gitDir $ do
repo <- getRepository
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
lift $ runReaderT (go shas) repo
where
go :: Both String -> ReaderT LgRepo IO [FilePath]
go shas = do
entries <- blobEntriesToDiff shas
pure $ (\(p, _, _) -> toS p) <$> entries
-- | Returns a list of blob entries that have changed between the given commits shas.
blobEntriesToDiff :: Both String -> ReaderT LgRepo IO [(TreeFilePath, Git.BlobOid LgRepo, BlobKind)]
blobEntriesToDiff shas = do
a <- blobEntries (fst shas)
b <- blobEntries (snd shas)
pure $ (a \\ b) <> (b \\ a)
where blobEntries sha = treeForCommitSha sha >>= treeBlobEntries'
treeBlobEntries' tree = reportGitmon "ls-tree" $ treeBlobEntries tree
-- | Returns a Git.Tree for a commit sha
treeForCommitSha :: String -> ReaderT LgRepo IO (Git.Tree LgRepo)
treeForCommitSha sha = do
object <- parseObjOid (toS sha)
commit <- reportGitmon "cat-file" $ lookupCommit object
reportGitmon "cat-file" $ lookupTree (commitTree commit)
-- | Returns a SourceBlob given a relative file path, and the sha to look up.
getSourceBlob :: FilePath -> String -> ReaderT LgRepo IO Source.SourceBlob
getSourceBlob path sha = do
tree <- treeForCommitSha sha
entry <- reportGitmon "ls-tree" $ treeEntry tree (toS path)
(bytestring, oid, mode) <- case entry of
Nothing -> pure (mempty, mempty, Nothing)
Just (BlobEntry entryOid entryKind) -> do
blob <- reportGitmon "cat-file" $ lookupBlob entryOid
s <- blobToByteString blob
let oid = renderObjOid $ blobOid blob
pure (s, oid, Just entryKind)
s <- liftIO $ transcode bytestring
pure $ Source.SourceBlob s (toS oid) path (toSourceKind <$> mode)
where
toSourceKind :: Git.BlobKind -> SourceKind
toSourceKind (Git.PlainBlob mode) = Source.PlainBlob mode
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode
-- | Given a parser, diff two sources and return a SyntaxDiff.
-- | Returns the rendered result strictly, so it's always fully evaluated with respect to other IO actions.
diffFiles :: (HasField fields Category, NFData (Record fields))
=> Parser (Syntax Text) (Record fields)
-> Both SourceBlob
-> IO (SyntaxDiff Text fields)
diffFiles parse sourceBlobs = do
traceEventIO $ "diffFiles@SEMANTIC-DIFF START parse terms: " <> paths
terms <- Async.withTaskGroup numCapabilities . flip Async.mapTasks $
(fmap (defaultFeatureVectorDecorator getLabel) . parse) <$> sourceBlobs
traceEventIO $ "diffFiles@SEMANTIC-DIFF END parse terms: " <> paths
traceEventIO $ "diffFiles@SEMANTIC-DIFF START diff terms: " <> paths
traceEvent ("diffFiles@SEMANTIC-DIFF END diff terms: " <> paths) pure $!! stripDiff (diffTerms' terms)
where
diffTerms' terms = case runBothWith areNullOids sourceBlobs of
(True, False) -> pure $ Insert (snd terms)
(False, True) -> pure $ Delete (fst terms)
(_, _) ->
runBothWith diffTerms terms
areNullOids a b = (hasNullOid a, hasNullOid b)
hasNullOid blob = oid blob == nullOid || null (source blob)
-- For trace events
paths = runBothWith (\a b -> fileAtSha a <> " -> " <> fileAtSha b) sourceBlobs
fileAtSha x = path x <> "@" <> toS (oid x)
getLabel :: HasField fields Category => CofreeF (Syntax leaf) (Record fields) b -> (Category, Maybe leaf)
getLabel (h :< t) = (category h, case t of
Leaf s -> Just s
_ -> Nothing)
-- | Returns a rendered diff given arguments and two source blobs.
renderDiff :: (ToJSON (Record fields), NFData (Record fields), DefaultFields fields) => Arguments -> Both SourceBlob -> SyntaxDiff Text fields -> Output
renderDiff args = case format args of
Split -> split
Patch -> patch
SExpression -> sExpression TreeOnly
JSON -> json
Summary -> summary
TOC -> toc
-- | Prints a rendered diff to stdio or a filepath given a parser, arguments and two source blobs.
printDiff :: (ToJSON (Record fields), NFData (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO ByteString
printDiff parser args sources = do
diff <- diffFiles parser sources
pure $! concatOutputs [renderDiff args sources diff]

View File

@ -1,6 +1,8 @@
{-# LANGUAGE DataKinds, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE ConstraintKinds, DataKinds, GeneralizedNewtypeDeriving #-}
module Info
( Range(..)
( DefaultFields
, HasDefaultFields
, Range(..)
, byteRange
, setCharacterRange
, Category(..)
@ -22,6 +24,12 @@ import Range
import SourceSpan
import Data.Aeson
-- | The default set of fields produced by our parsers.
type DefaultFields = '[ Range, Category, SourceSpan ]
-- | A type alias for HasField constraints commonly used throughout semantic-diff.
type HasDefaultFields fields = (HasField fields Category, HasField fields Range, HasField fields SourceSpan)
newtype SourceText = SourceText { unText :: Text }
deriving (Show, ToJSON)

View File

@ -2,6 +2,7 @@
module Language where
import Data.Record
import Data.String
import Info
import Prologue
import qualified Syntax as S
@ -10,28 +11,15 @@ import Term
-- | A programming language.
data Language =
C
| CoffeeScript
| CPlusPlus
| CSharp
| CSS
| Haskell
| HTML
| Java
| Go
| JavaScript
| Markdown
| ObjectiveC
| Perl
| PHP
| Python
| R
| Ruby
| Swift
| TypeScript
| Go
deriving (Show)
-- | Returns a Language based on the file extension (including the ".").
languageForType :: Text -> Maybe Language
languageForType :: String -> Maybe Language
languageForType mediaType = case mediaType of
".h" -> Just C
".c" -> Just C

View File

@ -10,8 +10,8 @@ import Term
termAssignment
:: Source -- ^ The source of the term.
-> Category -- ^ The category for the term.
-> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan])) -- ^ The resulting term, in Maybe.
-> [ SyntaxTerm Text DefaultFields ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text DefaultFields)) -- ^ The resulting term, in Maybe.
termAssignment _ _ _ = Nothing

View File

@ -10,8 +10,8 @@ import qualified Syntax as S
termAssignment
:: Source -- ^ The source of the term.
-> Category -- ^ The category for the term.
-> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan])) -- ^ The resulting term, in Maybe.
-> [ SyntaxTerm Text DefaultFields ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text DefaultFields)) -- ^ The resulting term, in Maybe.
termAssignment source category children = case (category, children) of
(Module, [moduleName]) -> Just $ S.Module moduleName []
(Import, [importName]) -> Just $ S.Import importName []

View File

@ -11,8 +11,8 @@ import Term
termAssignment
:: Source -- ^ The source of the term.
-> Category -- ^ The category for the term.
-> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan])) -- ^ The resulting term, in Maybe.
-> [ SyntaxTerm Text DefaultFields ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text DefaultFields)) -- ^ The resulting term, in Maybe.
termAssignment _ category children
= case (category, children) of
(Assignment, [ identifier, value ]) -> Just $ S.Assignment identifier value

View File

@ -10,9 +10,9 @@ import Prologue
import Source
import Syntax
cmarkParser :: Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
cmarkParser :: Parser (Syntax Text) (Record DefaultFields)
cmarkParser SourceBlob{..} = pure . toTerm (totalRange source) (rangeToSourceSpan source $ totalRange source) $ commonmarkToNode [ optSourcePos, optSafe ] (toText source)
where toTerm :: Range -> SourceSpan -> Node -> Cofree (Syntax Text) (Record '[Range, Category, SourceSpan])
where toTerm :: Range -> SourceSpan -> Node -> Cofree (Syntax Text) (Record DefaultFields)
toTerm within withinSpan (Node position t children) =
let
range = maybe within (sourceSpanToRange source . toSpan) position

View File

@ -12,8 +12,8 @@ import Term
termAssignment
:: Source -- ^ The source of the term.
-> Category -- ^ The category for the term.
-> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan])) -- ^ The resulting term, in Maybe.
-> [ SyntaxTerm Text DefaultFields ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text DefaultFields)) -- ^ The resulting term, in Maybe.
termAssignment _ category children
= case (category, children) of
(ArgumentPair, [ k, v ] ) -> Just $ S.Pair k v

View File

@ -11,8 +11,8 @@ import Term
termAssignment
:: Source -- ^ The source of the term.
-> Category -- ^ The category for the term.
-> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan])) -- ^ The resulting term, in Maybe.
-> [ SyntaxTerm Text DefaultFields ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text DefaultFields)) -- ^ The resulting term, in Maybe.
termAssignment _ category children =
case (category, children) of
(Assignment, [ identifier, value ]) -> Just $ S.Assignment identifier value

View File

@ -1,85 +1,56 @@
module Renderer (Renderer, Output(..), concatOutputs, toSummaryKey, Format(..)) where
{-# LANGUAGE GADTs #-}
module Renderer
( DiffRenderer(..)
, runDiffRenderer
, Format(..)
, Summaries(..)
, File(..)
) where
import Data.Aeson (Value, encode)
import Data.Aeson (ToJSON, Value)
import Data.Functor.Both
import Data.Map as Map hiding (null)
import Data.Text.Encoding (encodeUtf8)
import qualified Data.ByteString as B
import Data.Functor.Listable
import Data.Record
import Diff
import Info
import Prologue
import Renderer.JSON as R
import Renderer.Patch as R
import Renderer.SExpression as R
import Renderer.Split as R
import Renderer.Summary as R
import Renderer.TOC as R
import Source (SourceBlob)
import Syntax
import Diff
-- | A function that will render a diff, given the two source blobs.
type Renderer annotation = Both SourceBlob -> Diff (Syntax Text) annotation -> Output
data DiffRenderer fields output where
SplitRenderer :: (HasField fields Category, HasField fields Range) => DiffRenderer fields File
PatchRenderer :: HasField fields Range => DiffRenderer fields File
JSONDiffRenderer :: (ToJSON (Record fields), HasField fields Category, HasField fields Range) => DiffRenderer fields (Map Text Value)
SummaryRenderer :: HasDefaultFields fields => DiffRenderer fields Summaries
SExpressionDiffRenderer :: (HasField fields Category, HasField fields SourceSpan) => SExpressionFormat -> DiffRenderer fields ByteString
ToCRenderer :: HasDefaultFields fields => DiffRenderer fields Summaries
runDiffRenderer :: Monoid output => DiffRenderer fields output -> [(Both SourceBlob, Diff (Syntax Text) (Record fields))] -> output
runDiffRenderer renderer = foldMap . uncurry $ case renderer of
SplitRenderer -> (File .) . R.split
PatchRenderer -> (File .) . R.patch
JSONDiffRenderer -> R.json
SummaryRenderer -> R.summary
SExpressionDiffRenderer format -> R.sExpression format
ToCRenderer -> R.toc
-- | The available types of diff rendering.
data Format = Split | Patch | JSON | Summary | SExpression | TOC | Index | ParseTree
deriving (Show)
data Output = SplitOutput Text | PatchOutput Text | JSONOutput (Map Text Value) | SummaryOutput (Map Text (Map Text [Value])) | SExpressionOutput ByteString | TOCOutput (Map Text (Map Text [Value]))
deriving (Show)
-- Returns a key representing the filename. If the filenames are different,
-- return 'before -> after'.
toSummaryKey :: Both FilePath -> Text
toSummaryKey = runBothWith $ \before after ->
toS $ case (before, after) of
("", after) -> after
(before, "") -> before
(before, after) | before == after -> after
(before, after) | not (null before) && not (null after) -> before <> " -> " <> after
(_, _) -> mempty
-- Concatenates a list of 'Output' depending on the output type.
-- For JSON, each file output is merged since they're uniquely keyed by filename.
-- For Summaries, each file output is merged into one 'Object' consisting of lists of
-- changes and errors.
-- Split and Patch output is appended together with newlines.
concatOutputs :: [Output] -> ByteString
concatOutputs list | isJSON list = toS . encode $ concatJSON list
where
concatJSON :: [Output] -> Map Text Value
concatJSON (JSONOutput hash : rest) = Map.union hash (concatJSON rest)
concatJSON _ = mempty
concatOutputs list | isSummary list = toS . encode $ concatSummaries list
where
concatSummaries :: [Output] -> Map Text (Map Text [Value])
concatSummaries (SummaryOutput hash : rest) = Map.unionWith (Map.unionWith (<>)) hash (concatSummaries rest)
concatSummaries (TOCOutput hash : rest) = Map.unionWith (Map.unionWith (<>)) hash (concatSummaries rest)
concatSummaries _ = mempty
concatOutputs list | isByteString list = B.intercalate "\n" (toByteString <$> list)
concatOutputs list | isText list = B.intercalate "\n" (encodeUtf8 . toText <$> list)
concatOutputs _ = mempty
isJSON :: [Output] -> Bool
isJSON (JSONOutput _ : _) = True
isJSON _ = False
isSummary :: [Output] -> Bool
isSummary (SummaryOutput _ : _) = True
isSummary (TOCOutput _ : _) = True
isSummary _ = False
isText :: [Output] -> Bool
isText (SplitOutput _ : _) = True
isText (PatchOutput _ : _) = True
isText _ = False
toText :: Output -> Text
toText (SplitOutput text) = text
toText (PatchOutput text) = text
toText _ = mempty
isByteString :: [Output] -> Bool
isByteString (SExpressionOutput _ : _) = True
isByteString _ = False
toByteString :: Output -> ByteString
toByteString (SExpressionOutput text) = text
toByteString _ = B.empty
newtype File = File { unFile :: Text }
deriving Show
instance Monoid File where
mempty = File mempty
mappend (File a) (File b) = File (a <> "\n" <> b)
instance Listable Format where
tiers = cons0 Split

View File

@ -9,12 +9,13 @@ import Alignment
import Category
import Data.Aeson as A hiding (json)
import Data.Bifunctor.Join
import Data.Functor.Both
import Data.Record
import qualified Data.Text as T
import Data.These
import Data.Vector as Vector hiding (toList)
import Diff
import Info
import Renderer
import Source
import SplitDiff
import Syntax as S
@ -22,8 +23,8 @@ import Term
import qualified Data.Map as Map
-- | Render a diff to a string representing its JSON.
json :: (ToJSON (Record fields), HasField fields Category, HasField fields Range) => Renderer (Record fields)
json blobs diff = JSONOutput $ Map.fromList [
json :: (ToJSON (Record fields), HasField fields Category, HasField fields Range) => Both SourceBlob -> Diff (Syntax Text) (Record fields) -> Map Text Value
json blobs diff = Map.fromList [
("rows", toJSON (annotateRows (alignDiff (source <$> blobs) diff))),
("oids", toJSON (oid <$> blobs)),
("paths", toJSON (path <$> blobs)) ]

View File

@ -16,9 +16,9 @@ import Diff
import Patch
import Prologue hiding (fst, snd)
import Range
import Renderer
import qualified Source
import Source hiding (break, length, null)
import Syntax
import SplitDiff
-- | Render a timed out file as a truncated diff.
@ -26,8 +26,8 @@ truncatePatch :: Both SourceBlob -> Text
truncatePatch blobs = header blobs <> "#timed_out\nTruncating diff: timeout reached.\n"
-- | Render a diff in the traditional patch format.
patch :: HasField fields Range => Renderer (Record fields)
patch blobs diff = PatchOutput $ if not (Text.null text) && Text.last text /= '\n'
patch :: HasField fields Range => Both SourceBlob -> Diff (Syntax Text) (Record fields) -> Text
patch blobs diff = if not (Text.null text) && Text.last text /= '\n'
then text <> "\n\\ No newline at end of file\n"
else text
where text = header blobs <> mconcat (showHunk blobs <$> hunks diff blobs)

View File

@ -2,21 +2,22 @@
module Renderer.SExpression (sExpression, printTerm, printTerms, SExpressionFormat(..)) where
import Data.Bifunctor.Join
import Data.Record
import Data.ByteString hiding (foldr, spanEnd)
import Data.Functor.Both
import Data.Record
import Prologue hiding (replicate, encodeUtf8)
import Category as C
import Diff
import Renderer
import Patch
import Info
import Source
import Syntax
import Term
data SExpressionFormat = TreeOnly | TreeAndRanges
sExpression :: (HasField fields Category, HasField fields SourceSpan) => SExpressionFormat -> Renderer (Record fields)
sExpression format _ diff = SExpressionOutput $ printDiff diff 0 format
sExpression :: (HasField fields Category, HasField fields SourceSpan) => SExpressionFormat -> Both SourceBlob -> Diff (Syntax Text) (Record fields) -> ByteString
sExpression format _ diff = printDiff diff 0 format
printDiff :: (HasField fields Category, HasField fields SourceSpan) => Diff (Syntax Text) (Record fields) -> Int -> SExpressionFormat -> ByteString
printDiff diff level format = case runFree diff of

View File

@ -10,10 +10,10 @@ import Data.Functor.Both
import Data.Record
import qualified Data.Text.Lazy as TL
import Data.These
import Diff
import Info
import Prologue hiding (div, head, fst, snd, link, (<>))
import qualified Prologue
import Renderer
import Source
import SplitDiff
import Syntax
@ -161,8 +161,8 @@ splitPatchToClassName patch = stringValue $ "patch " <> case patch of
SplitReplace _ -> "replace"
-- | Render a diff as an HTML split diff.
split :: (HasField fields Category, HasField fields Range) => Renderer (Record fields)
split blobs diff = SplitOutput . TL.toStrict . renderHtml
split :: (HasField fields Category, HasField fields Range) => Both SourceBlob -> Diff (Syntax Text) (Record fields) -> Text
split blobs diff = TL.toStrict . renderHtml
. docTypeHtml
. ((head $ link ! A.rel "stylesheet" ! A.href "style.css") <>)
. body

View File

@ -2,13 +2,13 @@
-- {-# OPTIONS_GHC -funbox-strict-fields #-}
{-# OPTIONS_GHC -Wno-deprecations #-}
-- Disabling deprecation warnings due to pattern match against RescueModifier.
module Renderer.Summary (summary, diffSummaries, DiffSummary(..), DiffInfo(..), diffToDiffSummaries, isBranchInfo, isErrorSummary, JSONSummary(..)) where
module Renderer.Summary (Summaries(..), summary, diffSummaries, DiffSummary(..), DiffInfo(..), diffToDiffSummaries, isBranchInfo, isErrorSummary, JSONSummary(..)) where
import Prologue
import Diff
import Patch
import Term
import Info (category, byteRange)
import Info (HasDefaultFields, category, byteRange)
import Range
import Syntax as S
import Category as C
@ -25,10 +25,19 @@ import qualified Text.PrettyPrint.Leijen.Text as P
import Data.Aeson
import SourceSpan
import Source hiding (null)
import Renderer
import qualified Data.Map as Map
import qualified Data.List as List
data Summaries = Summaries { changes, errors :: !(Map Text [Value]) }
deriving Show
instance Monoid Summaries where
mempty = Summaries mempty mempty
mappend (Summaries c1 e1) (Summaries c2 e2) = Summaries (Map.unionWith (<>) c1 c2) (Map.unionWith (<>) e1 e2)
instance ToJSON Summaries where
toJSON Summaries{..} = object [ "changes" .= changes, "errors" .= errors ]
data Annotatable a = Annotatable a | Unannotatable a
annotatable :: SyntaxTerm leaf fields -> Annotatable (SyntaxTerm leaf fields)
@ -101,11 +110,8 @@ data DiffSummary a = DiffSummary {
parentAnnotation :: [Either (Category, Text) (Category, Text)]
} deriving (Eq, Functor, Show, Generic)
summary :: (DefaultFields fields) => Renderer (Record fields)
summary blobs diff = SummaryOutput $ Map.fromList [
("changes", changes),
("errors", errors)
]
summary :: HasDefaultFields fields => Both SourceBlob -> Diff (Syntax Text) (Record fields) -> Summaries
summary blobs diff = Summaries changes errors
where
changes = if null changes' then mempty else Map.singleton summaryKey (toJSON <$> changes')
errors = if null errors' then mempty else Map.singleton summaryKey (toJSON <$> errors')
@ -113,8 +119,19 @@ summary blobs diff = SummaryOutput $ Map.fromList [
summaryKey = toSummaryKey (path <$> blobs)
summaries = diffSummaries blobs diff
-- Returns a key representing the filename. If the filenames are different,
-- return 'before -> after'.
toSummaryKey :: Both FilePath -> Text
toSummaryKey = runBothWith $ \before after ->
toS $ case (before, after) of
("", after) -> after
(before, "") -> before
(before, after) | before == after -> after
(before, after) | not (null before) && not (null after) -> before <> " -> " <> after
(_, _) -> mempty
-- Returns a list of diff summary texts given two source blobs and a diff.
diffSummaries :: (StringConv leaf Text, DefaultFields fields) => Both SourceBlob -> SyntaxDiff leaf fields -> [JSONSummary Text SourceSpans]
diffSummaries :: (StringConv leaf Text, HasDefaultFields fields) => Both SourceBlob -> SyntaxDiff leaf fields -> [JSONSummary Text SourceSpans]
diffSummaries blobs diff = summaryToTexts =<< diffToDiffSummaries (source <$> blobs) diff
-- Takes a 'DiffSummary DiffInfo' and returns a list of JSON Summaries whose text summaries represent the LeafInfo summaries of the 'DiffSummary'.
@ -124,7 +141,7 @@ summaryToTexts DiffSummary{..} = appendParentContexts <$> jsonDocSummaries diffS
jsonSummary { info = show $ info jsonSummary <+> parentContexts parentAnnotation }
-- Returns a list of 'DiffSummary' given two source blobs and a diff.
diffToDiffSummaries :: (StringConv leaf Text, DefaultFields fields) => Both Source -> SyntaxDiff leaf fields -> [DiffSummary DiffInfo]
diffToDiffSummaries :: (StringConv leaf Text, HasDefaultFields fields) => Both Source -> SyntaxDiff leaf fields -> [DiffSummary DiffInfo]
diffToDiffSummaries sources = para $ \diff ->
let
diff' = free (Prologue.fst <$> diff)
@ -204,7 +221,7 @@ toLeafInfos LeafInfo{..} = pure $ JSONSummary (summary leafCategory termName) so
vowels = Text.singleton <$> ("aeiouAEIOU" :: [Char])
-- Returns a text representing a specific term given a source and a term.
toTermName :: forall leaf fields. (StringConv leaf Text, DefaultFields fields) => Source -> SyntaxTerm leaf fields -> Text
toTermName :: forall leaf fields. (StringConv leaf Text, HasDefaultFields fields) => Source -> SyntaxTerm leaf fields -> Text
toTermName source term = case unwrap term of
S.Send _ _ -> termNameFromSource term
S.Ty _ -> termNameFromSource term
@ -344,7 +361,7 @@ parentContexts contexts = hsep $ either identifiableDoc annotatableDoc <$> conte
toDoc :: Text -> Doc
toDoc = string . toS
termToDiffInfo :: (StringConv leaf Text, DefaultFields fields) => Source -> SyntaxTerm leaf fields -> DiffInfo
termToDiffInfo :: (StringConv leaf Text, HasDefaultFields fields) => Source -> SyntaxTerm leaf fields -> DiffInfo
termToDiffInfo blob term = case unwrap term of
S.Indexed children -> BranchInfo (termToDiffInfo' <$> children) (category $ extract term) BIndexed
S.Fixed children -> BranchInfo (termToDiffInfo' <$> children) (category $ extract term) BFixed
@ -361,7 +378,7 @@ termToDiffInfo blob term = case unwrap term of
-- | For a DiffSummary without a parentAnnotation, we append a parentAnnotation with the first identifiable term.
-- | For a DiffSummary with a parentAnnotation, we append the next annotatable term to the extant parentAnnotation.
-- | If a DiffSummary already has a parentAnnotation, and a (grand) parentAnnotation, then we return the summary without modification.
appendSummary :: (StringConv leaf Text, DefaultFields fields) => Source -> SyntaxTerm leaf fields -> DiffSummary DiffInfo -> DiffSummary DiffInfo
appendSummary :: (StringConv leaf Text, HasDefaultFields fields) => Source -> SyntaxTerm leaf fields -> DiffSummary DiffInfo -> DiffSummary DiffInfo
appendSummary source term summary =
case (parentAnnotation summary, identifiable term, annotatable term) of
([], Identifiable _, _) -> appendParentAnnotation Left

View File

@ -11,9 +11,9 @@ import Diff
import Info
import Prologue
import Range
import Renderer.Summary (Summaries(..))
import qualified Data.List as List
import qualified Data.Map as Map hiding (null)
import Renderer
import Source hiding (null)
import Syntax as S
import Term
@ -51,11 +51,8 @@ data Summarizable = Summarizable { summarizableCategory :: Category, summarizabl
data SummarizableTerm a = SummarizableTerm a | NotSummarizableTerm a
toc :: (DefaultFields fields) => Renderer (Record fields)
toc blobs diff = TOCOutput $ Map.fromList [
("changes", changes),
("errors", errors)
]
toc :: HasDefaultFields fields => Both SourceBlob -> Diff (Syntax Text) (Record fields) -> Summaries
toc blobs diff = Summaries changes errors
where
changes = if null changes' then mempty else Map.singleton summaryKey (toJSON <$> changes')
errors = if null errors' then mempty else Map.singleton summaryKey (toJSON <$> errors')
@ -63,7 +60,18 @@ toc blobs diff = TOCOutput $ Map.fromList [
summaryKey = toSummaryKey (path <$> blobs)
summaries = diffTOC blobs diff
diffTOC :: (StringConv leaf Text, DefaultFields fields) => Both SourceBlob -> SyntaxDiff leaf fields -> [JSONSummary]
-- Returns a key representing the filename. If the filenames are different,
-- return 'before -> after'.
toSummaryKey :: Both FilePath -> Text
toSummaryKey = runBothWith $ \before after ->
toS $ case (before, after) of
("", after) -> after
(before, "") -> before
(before, after) | before == after -> after
(before, after) | not (null before) && not (null after) -> before <> " -> " <> after
(_, _) -> mempty
diffTOC :: (StringConv leaf Text, HasDefaultFields fields) => Both SourceBlob -> SyntaxDiff leaf fields -> [JSONSummary]
diffTOC blobs diff = removeDupes (diffToTOCSummaries (source <$> blobs) diff) >>= toJSONSummaries
where
removeDupes :: [TOCSummary DiffInfo] -> [TOCSummary DiffInfo]
@ -83,7 +91,7 @@ diffTOC blobs diff = removeDupes (diffToTOCSummaries (source <$> blobs) diff) >>
(Summarizable catA nameA _ _, Summarizable catB nameB _ _) -> catA == catB && toLower nameA == toLower nameB
(_, _) -> False
diffToTOCSummaries :: (StringConv leaf Text, DefaultFields fields) => Both Source -> SyntaxDiff leaf fields -> [TOCSummary DiffInfo]
diffToTOCSummaries :: (StringConv leaf Text, HasDefaultFields fields) => Both Source -> SyntaxDiff leaf fields -> [TOCSummary DiffInfo]
diffToTOCSummaries sources = para $ \diff ->
let
diff' = free (Prologue.fst <$> diff)
@ -118,7 +126,7 @@ toLeafInfos' :: DiffInfo -> [DiffInfo]
toLeafInfos' BranchInfo{..} = branches >>= toLeafInfos'
toLeafInfos' leaf = [leaf]
mapToInSummarizable :: forall leaf fields. DefaultFields fields => Both Source -> SyntaxDiff leaf fields -> [TOCSummary DiffInfo] -> [TOCSummary DiffInfo]
mapToInSummarizable :: forall leaf fields. HasDefaultFields fields => Both Source -> SyntaxDiff leaf fields -> [TOCSummary DiffInfo] -> [TOCSummary DiffInfo]
mapToInSummarizable sources diff children = case (beforeTerm diff, afterTerm diff) of
(_, Just diff') -> mapToInSummarizable' (Both.snd sources) diff' <$> children
(Just diff', _) -> mapToInSummarizable' (Both.fst sources) diff' <$> children
@ -150,7 +158,7 @@ toJSONSummaries TOCSummary{..} = case afterOrBefore summaryPatch of
NotSummarizable -> []
_ -> pure $ JSONSummary parentInfo
termToDiffInfo :: forall leaf fields. (StringConv leaf Text, DefaultFields fields) => Source -> SyntaxTerm leaf fields -> DiffInfo
termToDiffInfo :: forall leaf fields. (StringConv leaf Text, HasDefaultFields fields) => Source -> SyntaxTerm leaf fields -> DiffInfo
termToDiffInfo source term = case unwrap term of
S.Indexed children -> BranchInfo (termToDiffInfo' <$> children) (category $ extract term)
S.Fixed children -> BranchInfo (termToDiffInfo' <$> children) (category $ extract term)
@ -163,7 +171,7 @@ termToDiffInfo source term = case unwrap term of
termToDiffInfo' = termToDiffInfo source
toLeafInfo term = LeafInfo (category $ extract term) (toTermName' term) (getField $ extract term)
toTermName :: forall leaf fields. DefaultFields fields => Int -> Source -> SyntaxTerm leaf fields -> Text
toTermName :: forall leaf fields. HasDefaultFields fields => Int -> Source -> SyntaxTerm leaf fields -> Text
toTermName parentOffset parentSource term = case unwrap term of
S.Function identifier _ _ -> toTermName' identifier
S.Method _ identifier Nothing _ _ -> toTermName' identifier

View File

@ -2,29 +2,56 @@
module SemanticDiff (main) where
import Arguments
import Prologue hiding (fst, snd)
import Data.String
import Command
import Command.Parse
import Development.GitRev
import Data.Aeson
import qualified Data.ByteString as B
import Data.Functor.Both
import Data.String
import Data.Version (showVersion)
import Text.Regex
import Options.Applicative hiding (action)
import qualified Paths_semantic_diff as Library (version)
import Prologue hiding (concurrently, fst, snd, readFile)
import qualified Renderer as R
import Development.GitRev
import DiffCommand
import ParseCommand
import qualified Data.ByteString as B
import qualified Renderer.SExpression as R
import Source
import Text.Regex
main :: IO ()
main = do
args@Arguments{..} <- programArguments =<< execParser argumentsParser
text <- case runMode of
Diff -> diff args
Diff -> runCommand $ do
let render = case format of
R.Split -> fmap encodeText . renderDiffs R.SplitRenderer
R.Patch -> fmap encodeText . renderDiffs R.PatchRenderer
R.JSON -> fmap encodeJSON . renderDiffs R.JSONDiffRenderer
R.Summary -> fmap encodeSummaries . renderDiffs R.SummaryRenderer
R.SExpression -> renderDiffs (R.SExpressionDiffRenderer R.TreeOnly)
R.TOC -> fmap encodeSummaries . renderDiffs R.ToCRenderer
_ -> fmap encodeText . renderDiffs R.PatchRenderer
diffs <- case diffMode of
PathDiff paths -> do
blobs <- traverse readFile paths
terms <- traverse (traverse parseBlob) blobs
diff' <- maybeDiff terms
return [(fromMaybe . emptySourceBlob <$> paths <*> blobs, diff')]
CommitDiff -> do
blobPairs <- readFilesAtSHAs gitDir alternateObjectDirs filePaths (fromMaybe (toS nullOid) <$> shaRange)
concurrently blobPairs . uncurry $ \ path blobs -> do
terms <- concurrently blobs (traverse parseBlob)
diff' <- maybeDiff terms
return (fromMaybe <$> pure (emptySourceBlob path) <*> blobs, diff')
render (diffs >>= \ (blobs, diff) -> (,) blobs <$> toList diff)
Parse -> case format of
R.Index -> parseIndex args
R.SExpression -> parseSExpression args
_ -> parseTree args
writeToOutput outputPath (text <> "\n")
writeToOutput outputPath text
where encodeText = encodeUtf8 . R.unFile
encodeJSON = toS . (<> "\n") . encode
encodeSummaries = toS . (<> "\n") . encode
-- | A parser for the application's command-line arguments.
argumentsParser :: ParserInfo CmdLineOptions

View File

@ -65,6 +65,9 @@ defaultPlainBlob = PlainBlob 0o100644
emptySourceBlob :: FilePath -> SourceBlob
emptySourceBlob filepath = SourceBlob Source.empty Source.nullOid filepath Nothing
nullBlob :: SourceBlob -> Bool
nullBlob SourceBlob{..} = oid == nullOid || Source.null source
sourceBlob :: Source -> FilePath -> SourceBlob
sourceBlob source filepath = SourceBlob source Source.nullOid filepath (Just defaultPlainBlob)

View File

@ -1,4 +1,4 @@
{-# LANGUAGE DeriveAnyClass, ScopedTypeVariables, DataKinds, KindSignatures #-}
{-# LANGUAGE DeriveAnyClass #-}
module Syntax where
import Data.Record
@ -114,12 +114,12 @@ data Syntax a f
deriving (Eq, Foldable, Functor, Generic, Generic1, Mergeable, Ord, Show, Traversable, ToJSON, NFData)
extractLeafValue :: forall b leaf. Syntax leaf b -> Maybe leaf
extractLeafValue :: Syntax leaf b -> Maybe leaf
extractLeafValue syntax = case syntax of
Leaf a -> Just a
_ -> Nothing
maybeIdentifier :: forall leaf (fields :: [*]). (HasField fields Info.Category) => Syntax leaf (Cofree (Syntax leaf) (Record fields)) -> Maybe (Cofree (Syntax leaf) (Record fields))
maybeIdentifier :: HasField fields Info.Category => Syntax leaf (Cofree (Syntax leaf) (Record fields)) -> Maybe (Cofree (Syntax leaf) (Record fields))
maybeIdentifier syntax = case syntax of
Assignment f _ -> Just f
Class f _ _ -> Just f

View File

@ -29,7 +29,7 @@ import SourceSpan
import Info
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax.Syntax Text) (Record DefaultFields)
treeSitterParser language grammar blob = do
document <- ts_document_new
ts_document_set_language document grammar
@ -42,13 +42,13 @@ treeSitterParser language grammar blob = do
-- | Return a parser for a tree sitter language & document.
documentToTerm :: Language -> Ptr Document -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
documentToTerm :: Language -> Ptr Document -> Parser (Syntax.Syntax Text) (Record DefaultFields)
documentToTerm language document SourceBlob{..} = do
root <- alloca (\ rootPtr -> do
ts_document_root_node_p document rootPtr
peek rootPtr)
toTerm root source
where toTerm :: Node -> Source -> IO (Term (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan]))
where toTerm :: Node -> Source -> IO (Term (Syntax.Syntax Text) (Record DefaultFields))
toTerm node source = do
name <- peekCString (nodeType node)
@ -77,7 +77,7 @@ nodeSpan :: Node -> SourceSpan
nodeSpan Node{..} = nodeStartPoint `seq` nodeEndPoint `seq` SourceSpan (pointPos nodeStartPoint) (pointPos nodeEndPoint)
where pointPos TSPoint{..} = pointRow `seq` pointColumn `seq` SourcePos (1 + fromIntegral pointRow) (1 + fromIntegral pointColumn)
assignTerm :: Language -> Source -> Record '[Range, Category, SourceSpan] -> [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO (SyntaxTerm Text '[ Range, Category, SourceSpan ])
assignTerm :: Language -> Source -> Record DefaultFields -> [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO (SyntaxTerm Text '[ Range, Category, SourceSpan ])
assignTerm language source annotation children allChildren =
cofree . (annotation :<) <$> case assignTermByLanguage language source (category annotation) children of
Just a -> pure a
@ -91,7 +91,7 @@ assignTerm language source annotation children allChildren =
TypeScript -> TS.termAssignment
_ -> \ _ _ _ -> Nothing
defaultTermAssignment :: Source -> Category -> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -> IO [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -> IO (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan]))
defaultTermAssignment :: Source -> Category -> [ SyntaxTerm Text DefaultFields ] -> IO [ SyntaxTerm Text DefaultFields ] -> IO (S.Syntax Text (SyntaxTerm Text DefaultFields))
defaultTermAssignment source category children allChildren
| category `elem` operatorCategories = S.Operator <$> allChildren
| otherwise = pure $! case (category, children) of
@ -137,7 +137,7 @@ categoryForLanguageProductionName = withDefaults . byLanguage
withDefaults productionMap name = case name of
"ERROR" -> ParseError
s -> productionMap s
byLanguage language = case language of
JavaScript -> JS.categoryForJavaScriptProductionName
C -> C.categoryForCProductionName

View File

@ -1,64 +1,58 @@
module DiffCommandSpec where
module Command.Diff.Spec where
import Command
import Data.Aeson
import Data.Aeson.Types
import Data.Maybe
import Data.Functor.Both
import Data.Map as Map
import Data.Maybe
import Data.Text.Lazy as T
import qualified Data.Vector as V
import qualified Git.Types as Git
import Info
import Prelude
import Prologue (($), fmap, (.), pure, for, panic)
import Renderer hiding (errors)
import Source
import Test.Hspec hiding (shouldBe, shouldNotBe, shouldThrow, errorCall)
import Test.Hspec.Expectations.Pretty
import Test.Hspec.LeanCheck
import Data.Text.Lazy as T
import qualified Data.ByteString.Lazy as BL
import Data.Map
import qualified Data.Vector as V
import Arguments
import DiffCommand
import Renderer
import qualified Git.Types as Git
spec :: Spec
spec = parallel $ do
context "diff" $ do
prop "all formats should produce output for file paths" $
\format -> do
output <- diff $ diffPathsArgs "" (both "test/fixtures/ruby/and-or.A.rb" "test/fixtures/ruby/and-or.B.rb") format
output `shouldNotBe` ""
prop "all formats should produce output for commit range" $
\format -> do
output <- diff $ args "test/fixtures/git/examples/all-languages.git" "dfac8fd681b0749af137aebf3203e77a06fbafc2" "2e4144eb8c44f007463ec34cb66353f0041161fe" ["methods.rb"] format
output `shouldNotBe` ""
describe "fetchDiffs" $ do
it "generates diff summaries for two shas" $ do
(errors, summaries) <- fetchDiffsOutput summaryText $ args "test/fixtures/git/examples/all-languages.git" "dfac8fd681b0749af137aebf3203e77a06fbafc2" "2e4144eb8c44f007463ec34cb66353f0041161fe" ["methods.rb"] Renderer.Summary
(errors, summaries) <- fetchDiffsOutput summaryText "test/fixtures/git/examples/all-languages.git" "dfac8fd681b0749af137aebf3203e77a06fbafc2" "2e4144eb8c44f007463ec34cb66353f0041161fe" ["methods.rb"] Renderer.SummaryRenderer
errors `shouldBe` Just (fromList [])
summaries `shouldBe` Just (fromList [("methods.rb", ["Added the 'foo()' method"])])
it "generates toc summaries for two shas" $ do
(errors, summaries) <- fetchDiffsOutput termText $ args "test/fixtures/git/examples/all-languages.git" "dfac8fd681b0749af137aebf3203e77a06fbafc2" "2e4144eb8c44f007463ec34cb66353f0041161fe" ["methods.rb"] Renderer.TOC
(errors, summaries) <- fetchDiffsOutput termText "test/fixtures/git/examples/all-languages.git" "dfac8fd681b0749af137aebf3203e77a06fbafc2" "2e4144eb8c44f007463ec34cb66353f0041161fe" ["methods.rb"] Renderer.ToCRenderer
errors `shouldBe` Just (fromList [])
summaries `shouldBe` Just (fromList [("methods.rb", ["foo"])])
it "generates toc summaries for two shas inferring paths" $ do
(errors, summaries) <- fetchDiffsOutput termText $ args "test/fixtures/git/examples/all-languages.git" "dfac8fd681b0749af137aebf3203e77a06fbafc2" "2e4144eb8c44f007463ec34cb66353f0041161fe" [] Renderer.TOC
(errors, summaries) <- fetchDiffsOutput termText "test/fixtures/git/examples/all-languages.git" "dfac8fd681b0749af137aebf3203e77a06fbafc2" "2e4144eb8c44f007463ec34cb66353f0041161fe" [] Renderer.ToCRenderer
errors `shouldBe` Just (fromList [])
summaries `shouldBe` Just (fromList [("methods.rb", ["foo"])])
it "errors with bad shas" $
fetchDiffsOutput summaryText (args "test/fixtures/git/examples/all-languages.git" "dead" "beef" ["methods.rb"] Renderer.Summary)
fetchDiffsOutput summaryText "test/fixtures/git/examples/all-languages.git" "dead" "beef" ["methods.rb"] Renderer.SummaryRenderer
`shouldThrow` (== Git.BackendError "Could not lookup dead: Object not found - no match for prefix (dead000000000000000000000000000000000000)")
it "errors with bad repo path" $
fetchDiffsOutput summaryText (args "test/fixtures/git/examples/not-a-repo.git" "dfac8fd681b0749af137aebf3203e77a06fbafc2" "2e4144eb8c44f007463ec34cb66353f0041161fe" ["methods.rb"] Renderer.Summary)
fetchDiffsOutput summaryText "test/fixtures/git/examples/not-a-repo.git" "dfac8fd681b0749af137aebf3203e77a06fbafc2" "2e4144eb8c44f007463ec34cb66353f0041161fe" ["methods.rb"] Renderer.SummaryRenderer
`shouldThrow` errorCall "Could not open repository \"test/fixtures/git/examples/not-a-repo.git\""
fetchDiffsOutput :: (Object -> Text) -> Arguments -> IO (Maybe (Map Text Value), Maybe (Map Text [Text]))
fetchDiffsOutput f arguments = do
diffs <- fetchDiffs arguments
let json = fromJust . decode . BL.fromStrict $ concatOutputs diffs
fetchDiffsOutput :: (Object -> Text) -> FilePath -> String -> String -> [FilePath] -> DiffRenderer DefaultFields Summaries -> IO (Maybe (Map Text Value), Maybe (Map Text [Text]))
fetchDiffsOutput f gitDir sha1 sha2 filePaths renderer = do
results <- fmap encode . runCommand $ do
blobs <- readFilesAtSHAs gitDir [] filePaths (both sha1 sha2)
diffs <- for blobs . uncurry $ \ path blobs -> do
terms <- traverse (traverse parseBlob) blobs
Just diff' <- maybeDiff terms
return (fromMaybe <$> pure (emptySourceBlob path) <*> blobs, diff')
renderDiffs renderer diffs
let json = fromJust (decode results)
pure (errors json, summaries f json)
-- Diff Summaries payloads look like this:

View File

@ -1,5 +1,6 @@
module ParseCommandSpec where
module Command.Parse.Spec where
import Command.Parse
import Data.Functor.Listable
import Prelude
import Test.Hspec hiding (shouldBe, shouldNotBe, shouldThrow, errorCall)
@ -7,7 +8,6 @@ import Test.Hspec.Expectations.Pretty
import Test.Hspec.LeanCheck
import Test.LeanCheck
import Arguments
import ParseCommand
import Renderer
spec :: Spec

51
test/Command/Spec.hs Normal file
View File

@ -0,0 +1,51 @@
module Command.Spec where
import Command
import Data.Functor.Both
import Data.String
import Language
import Prologue hiding (readFile)
import Source
import Syntax
import Test.Hspec
spec :: Spec
spec = parallel $ do
describe "readFile" $ do
it "returns a blob for extant files" $ do
blob <- runCommand (readFile "semantic-diff.cabal")
fmap path blob `shouldBe` Just "semantic-diff.cabal"
it "returns Nothing for absent files" $ do
blob <- runCommand (readFile "this file should not exist")
blob `shouldBe` Nothing
describe "readFilesAtSHAs" $ do
it "returns blobs for the specified paths" $ do
blobs <- runCommand (readFilesAtSHAs repoPath [] ["methods.rb"] (shas methodsFixture))
blobs `shouldBe` expectedBlobs methodsFixture
it "returns blobs for all paths if none are specified" $ do
blobs <- runCommand (readFilesAtSHAs repoPath [] [] (shas methodsFixture))
blobs `shouldBe` expectedBlobs methodsFixture
it "returns entries for missing paths" $ do
blobs <- runCommand (readFilesAtSHAs repoPath [] ["this file should not exist"] (shas methodsFixture))
blobs `shouldBe` [("this file should not exist", pure Nothing)]
describe "parse" $ do
it "parses line by line if not given a language" $ do
term <- runCommand (parse Nothing methodsBlob)
fmap (const ()) term `shouldBe` cofree (() :< Indexed [ cofree (() :< Leaf "def foo\n"), cofree (() :< Leaf "end\n"), cofree (() :< Leaf "") ])
it "parses in the specified language" $ do
term <- runCommand (parse (Just Ruby) methodsBlob)
fmap (const ()) term `shouldBe` cofree (() :< Indexed [ cofree (() :< Method [] (cofree (() :< Leaf "foo")) Nothing [] []) ])
where repoPath = "test/fixtures/git/examples/all-languages.git"
methodsFixture = Fixture
(both "dfac8fd681b0749af137aebf3203e77a06fbafc2" "2e4144eb8c44f007463ec34cb66353f0041161fe")
[ ("methods.rb", both Nothing (Just methodsBlob)) ]
methodsBlob = SourceBlob (Source "def foo\nend\n") "ff7bbbe9495f61d9e1e58c597502d152bab1761e" "methods.rb" (Just defaultPlainBlob)
data Fixture = Fixture { shas :: Both String, expectedBlobs :: [(FilePath, Both (Maybe SourceBlob))] }

View File

@ -1,21 +1,21 @@
{-# LANGUAGE DataKinds, GeneralizedNewtypeDeriving, OverloadedStrings #-}
module IntegrationSpec where
import Category as C
import Command
import Command.Parse
import Data.Functor.Both
import Data.List (union, concat, transpose)
import Data.Record
import qualified Data.Text as T
import qualified Data.ByteString as B
import Data.Text.Encoding (decodeUtf8)
import Diff
import GHC.Show (Show(..))
import Data.List (union, concat, transpose)
import Info
import ParseCommand
import Prologue hiding (fst, snd)
import Renderer
import Prologue hiding (fst, snd, readFile)
import Renderer.SExpression as Renderer
import Source
import DiffCommand
import Syntax
import System.FilePath
import System.FilePath.Glob
import Test.Hspec (Spec, describe, it, SpecWith, runIO, parallel)
@ -116,20 +116,17 @@ testParse path expectedOutput = do
expected <- (Verbatim . stripWhitespace) <$> B.readFile expectedOutput
actual `shouldBe` expected
testDiff :: Renderer (Record '[Range, Category, SourceSpan]) -> Both FilePath -> FilePath -> Expectation
testDiff :: (Both SourceBlob -> Diff (Syntax Text) (Record DefaultFields) -> ByteString) -> Both FilePath -> FilePath -> Expectation
testDiff renderer paths expectedOutput = do
sources <- traverse readAndTranscodeFile' paths
diff <- diffFiles parser (sourceBlobs sources)
let diffOutput = renderer (sourceBlobs sources) diff
let actual = (Verbatim . stripWhitespace. concatOutputs . pure) diffOutput
expected <- (Verbatim . stripWhitespace) <$> B.readFile expectedOutput
(blobs, diff') <- runCommand $ do
blobs <- traverse readFile paths
terms <- traverse (traverse parseBlob) blobs
Just diff' <- maybeDiff terms
return (fromMaybe . emptySourceBlob <$> paths <*> blobs, diff')
let diffOutput = renderer blobs diff'
let actual = Verbatim (stripWhitespace diffOutput)
expected <- Verbatim . stripWhitespace <$> B.readFile expectedOutput
actual `shouldBe` expected
where
parser = parserForFilepath filePath
sourceBlobs sources = Source.SourceBlob <$> sources <*> pure mempty <*> paths <*> pure (Just Source.defaultPlainBlob)
readAndTranscodeFile' path | Prologue.null path = pure Source.empty
| otherwise = readAndTranscodeFile path
filePath = if fst paths /= "" then fst paths else snd paths
stripWhitespace :: ByteString -> ByteString
stripWhitespace = B.foldl' go B.empty

View File

@ -2,6 +2,9 @@ module Main where
import Prologue
import qualified AlignmentSpec
import qualified Command.Spec
import qualified Command.Diff.Spec
import qualified Command.Parse.Spec
import qualified Data.Mergeable.Spec
import qualified Data.RandomWalkSimilarity.Spec
import qualified DiffSpec
@ -14,8 +17,6 @@ import qualified SES.Myers.Spec
import qualified SourceSpec
import qualified TermSpec
import qualified TOCSpec
import qualified DiffCommandSpec
import qualified ParseCommandSpec
import qualified IntegrationSpec
import Test.Hspec
@ -23,10 +24,13 @@ main :: IO ()
main = hspec $ do
parallel $ do
describe "Alignment" AlignmentSpec.spec
describe "Command" Command.Spec.spec
describe "Command.Diff" Command.Diff.Spec.spec
describe "Command.Parse" Command.Parse.Spec.spec
describe "Data.Mergeable" Data.Mergeable.Spec.spec
describe "Data.RandomWalkSimilarity" Data.RandomWalkSimilarity.Spec.spec
describe "Diff" DiffSpec.spec
describe "Summary" SummarySpec.spec
describe "Summary" SummarySpec.spfffec
describe "Interpreter" InterpreterSpec.spec
describe "PatchOutput" PatchOutputSpec.spec
describe "Range" RangeSpec.spec
@ -34,8 +38,7 @@ main = hspec $ do
describe "Source" SourceSpec.spec
describe "Term" TermSpec.spec
describe "TOC" TOCSpec.spec
describe "DiffCommand" DiffCommandSpec.spec
describe "ParseCommand" ParseCommandSpec.spec
describe "Integration" IntegrationSpec.spec
describe "GitmonClient" GitmonClientSpec.spec

View File

@ -3,16 +3,15 @@ module TOCSpec where
import Data.Aeson
import Category as C
import Command
import Data.Functor.Both
import Data.Functor.Listable
import Data.RandomWalkSimilarity
import Data.Record
import Data.String
import Diff
import DiffCommand
import Info
import Interpreter
import ParseCommand
import Patch
import Prologue hiding (fst, snd)
import Renderer
@ -118,14 +117,14 @@ spec = parallel $ do
output <- diffOutput sourceBlobs
output `shouldBe` "{\"changes\":{},\"errors\":{\"ruby/methods.A.rb -> ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"def bar\\nen\\n\"}]}}"
type Diff' = SyntaxDiff String '[Range, Category, SourceSpan]
type Term' = SyntaxTerm String '[Range, Category, SourceSpan]
type Diff' = SyntaxDiff String DefaultFields
type Term' = SyntaxTerm String DefaultFields
diffOutput :: Both SourceBlob -> IO ByteString
diffOutput sourceBlobs = do
let parser = parserForFilepath (path (fst sourceBlobs))
diff <- diffFiles parser sourceBlobs
pure $ concatOutputs [toc sourceBlobs diff]
diffOutput blobs = runCommand $ do
terms <- for blobs parseBlob
diff' <- diff terms
toS . encode <$> renderDiffs ToCRenderer [ (blobs, diff') ]
numTocSummaries :: Diff' -> Int
numTocSummaries diff = Prologue.length $ filter (not . isErrorSummary) (diffTOC blankDiffBlobs diff)
@ -162,14 +161,14 @@ functionOf name body = cofree $ functionInfo :< S.Function name' [] [body]
where
name' = cofree $ (Range 0 0 :. C.Identifier :. sourceSpanBetween (0,0) (0,0) :. Nil) :< Leaf name
programInfo :: Record '[Range, Category, SourceSpan]
programInfo :: Record DefaultFields
programInfo = Range 0 0 :. C.Program :. sourceSpanBetween (0,0) (0,0) :. Nil
functionInfo :: Record '[Range, Category, SourceSpan]
functionInfo :: Record DefaultFields
functionInfo = Range 0 0 :. C.Function :. sourceSpanBetween (0,0) (0,0) :. Nil
-- Filter tiers for terms that we consider "meaniningful" in TOC summaries.
isMeaningfulTerm :: ListableF (Term (Syntax leaf)) (Record '[Range, Category, SourceSpan]) -> Bool
isMeaningfulTerm :: ListableF (Term (Syntax leaf)) (Record DefaultFields) -> Bool
isMeaningfulTerm a = case runCofree (unListableF a) of
(_ :< S.Indexed _) -> False
(_ :< S.Fixed _) -> False
@ -178,7 +177,7 @@ isMeaningfulTerm a = case runCofree (unListableF a) of
_ -> True
-- Filter tiers for terms if the Syntax is a Method or a Function.
isMethodOrFunction :: ListableF (Term (Syntax leaf)) (Record '[Range, Category, SourceSpan]) -> Bool
isMethodOrFunction :: ListableF (Term (Syntax leaf)) (Record DefaultFields) -> Bool
isMethodOrFunction a = case runCofree (unListableF a) of
(_ :< S.Method{}) -> True
(_ :< S.Function{}) -> True
@ -187,10 +186,10 @@ isMethodOrFunction a = case runCofree (unListableF a) of
(a :< _) | getField a == C.SingletonMethod -> True
_ -> False
testDiff :: Both SourceBlob -> IO (Diff (Syntax Text) (Record '[Range, Category, SourceSpan]))
testDiff sourceBlobs = diffFiles parser sourceBlobs
where
parser = parserForFilepath (path . fst $ sourceBlobs)
testDiff :: Both SourceBlob -> IO (Diff (Syntax Text) (Record DefaultFields))
testDiff blobs = runCommand $ do
terms <- for blobs parseBlob
diff terms
blobsForPaths :: Both FilePath -> IO (Both SourceBlob)
blobsForPaths paths = do

@ -1 +1 @@
Subproject commit be0b929a70453175af218870cb51e70575581c77
Subproject commit e76073e4c6ccb75af73f86ac9fe5fc5496e4d44f