1
1
mirror of https://github.com/github/semantic.git synced 2024-12-26 00:12:29 +03:00

Start to make a clean delineation about command function that use git or the fs

This commit is contained in:
Timothy Clem 2017-04-19 09:45:08 -07:00
parent eb9547fb25
commit 18e81adb14
9 changed files with 172 additions and 141 deletions

View File

@ -18,6 +18,8 @@ library
, Arguments , Arguments
, Category , Category
, Command , Command
, Command.Files
, Command.Git
, Command.Parse , Command.Parse
, Data.Align.Generic , Data.Align.Generic
, Data.Functor.Both , Data.Functor.Both
@ -50,6 +52,7 @@ library
, Language.Ruby , Language.Ruby
, Language.Ruby.Syntax , Language.Ruby.Syntax
, Parser , Parser
, Parser.Language
, Patch , Patch
, Paths_semantic_diff , Paths_semantic_diff
, Prologue , Prologue

View File

@ -23,6 +23,7 @@ module Command
, runCommand , runCommand
) where ) where
import Command.Files
import Command.Parse import Command.Parse
import qualified Control.Concurrent.Async.Pool as Async import qualified Control.Concurrent.Async.Pool as Async
import Control.Exception (catch) import Control.Exception (catch)
@ -51,6 +52,7 @@ import Info
import Interpreter import Interpreter
import Language import Language
import Patch import Patch
import Parser.Language
import Prologue hiding (concurrently, Concurrently, readFile) import Prologue hiding (concurrently, Concurrently, readFile)
import qualified Renderer as R import qualified Renderer as R
import qualified Renderer.SExpression as R import qualified Renderer.SExpression as R

46
src/Command/Files.hs Normal file
View File

@ -0,0 +1,46 @@
module Command.Files
( sourceBlobsFromPaths
, transcode
) where
import Prologue
import Source
import qualified Data.ByteString as B
import System.IO
import Control.Exception (catch, IOException)
import qualified Data.Text.ICU.Convert as Convert
import qualified Data.Text.ICU.Detect as Detect
-- | For the given absolute file paths, retrieves their source blobs.
sourceBlobsFromPaths :: [FilePath] -> IO [SourceBlob]
sourceBlobsFromPaths filePaths =
for filePaths (\filePath -> do
source <- readAndTranscodeFile filePath
pure $ Source.SourceBlob source mempty filePath (Just Source.defaultPlainBlob))
-- | Read the file and convert it to Unicode.
readAndTranscodeFile :: FilePath -> IO Source
readAndTranscodeFile path = do
size <- fileSize path
text <- case size of
0 -> pure B.empty
_ -> B.readFile path
transcode text
-- Based on https://github.com/haskell/bytestring/pull/79/files
-- Neccessary to be able to handle /dev/null as a file.
fileSize :: FilePath -> IO Integer
fileSize f = withBinaryFile f ReadMode $ \h -> do
-- hFileSize fails if file is not regular file (like /dev/null). Catch
-- exception and return 0 in that case.
filesz <- catch (hFileSize h) useZeroIfNotRegularFile
pure $ fromIntegral filesz `max` 0
where useZeroIfNotRegularFile :: IOException -> IO Integer
useZeroIfNotRegularFile _ = pure 0
-- | Transcode a file to a unicode source.
transcode :: B.ByteString -> IO Source
transcode text = fromText <$> do
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
pure $ Convert.toUnicode converter text

41
src/Command/Git.hs Normal file
View File

@ -0,0 +1,41 @@
module Command.Git (sourceBlobsFromSha) where
import Prologue
import Source
import Command.Files
import Data.String
import Git.Blob
import Git.Libgit2
import Git.Repository
import Git.Types
import qualified Git
-- | For the given sha, git repo path, and file paths, retrieves the source blobs.
sourceBlobsFromSha :: String -> String -> [FilePath] -> IO [SourceBlob]
sourceBlobsFromSha commitSha gitDir filePaths = do
maybeBlobs <- withRepository lgFactory gitDir $ do
repo <- getRepository
object <- parseObjOid (toS commitSha)
commit <- lookupCommit object
tree <- lookupTree (commitTree commit)
lift $ runReaderT (traverse (toSourceBlob tree) filePaths) repo
pure $ catMaybes maybeBlobs
where
toSourceBlob :: Git.Tree LgRepo -> FilePath -> ReaderT LgRepo IO (Maybe SourceBlob)
toSourceBlob tree filePath = do
entry <- treeEntry tree (toS filePath)
case entry of
Just (BlobEntry entryOid entryKind) -> do
blob <- lookupBlob entryOid
bytestring <- blobToByteString blob
let oid = renderObjOid $ blobOid blob
s <- liftIO $ transcode bytestring
pure . Just $ SourceBlob s (toS oid) filePath (Just (toSourceKind entryKind))
_ -> pure Nothing
where
toSourceKind :: Git.BlobKind -> SourceKind
toSourceKind (Git.PlainBlob mode) = Source.PlainBlob mode
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode

View File

@ -1,18 +1,8 @@
{-# LANGUAGE DataKinds, GADTs, GeneralizedNewtypeDeriving, ScopedTypeVariables, TypeFamilies, TypeOperators #-} {-# LANGUAGE DataKinds, GADTs, GeneralizedNewtypeDeriving, ScopedTypeVariables, TypeFamilies, TypeOperators #-}
module Command.Parse module Command.Parse
( ( jsonParseTree
-- Render parser trees
jsonParseTree
, jsonIndexParseTree , jsonIndexParseTree
, sExpressionParseTree , sExpressionParseTree
-- Parsers
, parserForLanguage
, parserForFilePath -- TODO: Remove. Langauge detection should not come from file extension.
-- Files and transcoding
, transcode
, sourceBlobsFromPaths
-- Git
, sourceBlobsFromSha
) where ) where
import Category import Category
@ -20,71 +10,15 @@ import Data.Aeson (ToJSON, toJSON, encode, object, (.=))
import Data.Aeson.Types (Pair) import Data.Aeson.Types (Pair)
import Data.Functor.Foldable hiding (Nil) import Data.Functor.Foldable hiding (Nil)
import Data.Record import Data.Record
import Data.String
import qualified Data.ByteString as B
import qualified Data.Text as T
import Git.Blob
import Git.Libgit2
import Git.Repository
import Git.Types
import qualified Git
import Info import Info
import Language
import Language.Markdown
import Parser import Parser
import Parser.Language
import Prologue import Prologue
import Source import Source
import Syntax import Syntax
import System.FilePath
import Term import Term
import TreeSitter
import Renderer.JSON() import Renderer.JSON()
import Renderer.SExpression import Renderer.SExpression
import Text.Parser.TreeSitter.C
import Text.Parser.TreeSitter.Go
import Text.Parser.TreeSitter.JavaScript
import Text.Parser.TreeSitter.Ruby
import Text.Parser.TreeSitter.TypeScript
import System.IO
import Control.Exception (catch, IOException)
import qualified Data.Text.ICU.Convert as Convert
import qualified Data.Text.ICU.Detect as Detect
-- TODO: This doesn't belong in here
-- | Return a parser for a given langauge or the lineByLineParser parser.
parserForLanguage :: Maybe Language -> Parser (Syntax Text) (Record DefaultFields)
parserForLanguage Nothing = lineByLineParser
parserForLanguage (Just language) = case language of
C -> treeSitterParser C tree_sitter_c
JavaScript -> treeSitterParser JavaScript tree_sitter_javascript
TypeScript -> treeSitterParser TypeScript tree_sitter_typescript
Markdown -> cmarkParser
Ruby -> treeSitterParser Ruby tree_sitter_ruby
Language.Go -> treeSitterParser Language.Go tree_sitter_go
-- | Return a parser based on the file extension (including the ".").
-- | TODO: Remove.
parserForType :: String -> Parser (Syntax Text) (Record DefaultFields)
parserForType = parserForLanguage . languageForType
-- | Return the parser that should be used for a given path.
-- | TODO: Remove.
parserForFilePath :: FilePath -> Parser (Syntax Text) (Record DefaultFields)
parserForFilePath = parserForType . toS . takeExtension
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser (Syntax Text) (Record DefaultFields)
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> cofree <$> leaves
where
lines = actualLines source
root children = (sourceRange :. Program :. rangeToSourceSpan source sourceRange :. Nil) :< Indexed children
sourceRange = Source.totalRange source
leaf charIndex line = (Range charIndex (charIndex + T.length line) :. Program :. rangeToSourceSpan source (Range charIndex (charIndex + T.length line)) :. Nil) :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum <> [ leaf charIndex (Source.toText line) ] , charIndex + Source.length line)
data ParseTreeFile = ParseTreeFile { parseTreeFilePath :: FilePath, node :: Rose ParseNode } deriving (Show) data ParseTreeFile = ParseTreeFile { parseTreeFilePath :: FilePath, node :: Rose ParseNode } deriving (Show)
@ -119,6 +53,15 @@ parseNodeToJSONFields ParseNode{..} =
<> [ "sourceText" .= sourceText | isJust sourceText ] <> [ "sourceText" .= sourceText | isJust sourceText ]
<> [ "identifier" .= identifier | isJust identifier ] <> [ "identifier" .= identifier | isJust identifier ]
-- | Constructs ParseTreeFile nodes for the provided arguments and encodes them to JSON.
jsonParseTree :: Bool -> [SourceBlob] -> IO ByteString
jsonParseTree debug = fmap (toS . encode) . parseRoot debug ParseTreeFile Rose
-- | Constructs IndexFile nodes for the provided arguments and encodes them to JSON.
jsonIndexParseTree :: Bool -> [SourceBlob] -> IO ByteString
jsonIndexParseTree debug = fmap (toS . encode) . parseRoot debug IndexFile (\ node siblings -> node : concat siblings)
-- | Parses file contents into an SExpression format for the provided arguments. -- | Parses file contents into an SExpression format for the provided arguments.
sExpressionParseTree :: Bool -> [SourceBlob] -> IO ByteString sExpressionParseTree :: Bool -> [SourceBlob] -> IO ByteString
sExpressionParseTree _ blobs = sExpressionParseTree _ blobs =
@ -135,56 +78,11 @@ parseRoot debug construct combine blobs = for blobs (\ sourceBlob@SourceBlob{..}
makeNode (head :. range :. category :. sourceSpan :. Nil) syntax = makeNode (head :. range :. category :. sourceSpan :. Nil) syntax =
ParseNode (toS category) range head sourceSpan (identifierFor syntax) ParseNode (toS category) range head sourceSpan (identifierFor syntax)
-- | Constructs IndexFile nodes for the provided arguments and encodes them to JSON.
jsonIndexParseTree :: Bool -> [SourceBlob] -> IO ByteString
jsonIndexParseTree debug = fmap (toS . encode) . parseRoot debug IndexFile (\ node siblings -> node : concat siblings)
-- | Constructs ParseTreeFile nodes for the provided arguments and encodes them to JSON.
jsonParseTree :: Bool -> [SourceBlob] -> IO ByteString
jsonParseTree debug = fmap (toS . encode) . parseRoot debug ParseTreeFile Rose
-- | Determines the term decorator to use when parsing. -- | Determines the term decorator to use when parsing.
parseDecorator :: (Functor f, HasField fields Range) => Bool -> (Source -> TermDecorator f fields (Maybe SourceText)) parseDecorator :: (Functor f, HasField fields Range) => Bool -> (Source -> TermDecorator f fields (Maybe SourceText))
parseDecorator True = termSourceTextDecorator parseDecorator True = termSourceTextDecorator
parseDecorator False = const . const Nothing parseDecorator False = const . const Nothing
-- | For the given absolute file paths, retrieves their source blobs.
sourceBlobsFromPaths :: [FilePath] -> IO [SourceBlob]
sourceBlobsFromPaths filePaths =
for filePaths (\filePath -> do
source <- readAndTranscodeFile filePath
pure $ Source.SourceBlob source mempty filePath (Just Source.defaultPlainBlob))
-- | For the given sha, git repo path, and file paths, retrieves the source blobs.
sourceBlobsFromSha :: String -> String -> [FilePath] -> IO [SourceBlob]
sourceBlobsFromSha commitSha gitDir filePaths = do
maybeBlobs <- withRepository lgFactory gitDir $ do
repo <- getRepository
object <- parseObjOid (toS commitSha)
commit <- lookupCommit object
tree <- lookupTree (commitTree commit)
lift $ runReaderT (traverse (toSourceBlob tree) filePaths) repo
pure $ catMaybes maybeBlobs
where
toSourceBlob :: Git.Tree LgRepo -> FilePath -> ReaderT LgRepo IO (Maybe SourceBlob)
toSourceBlob tree filePath = do
entry <- treeEntry tree (toS filePath)
case entry of
Just (BlobEntry entryOid entryKind) -> do
blob <- lookupBlob entryOid
bytestring <- blobToByteString blob
let oid = renderObjOid $ blobOid blob
s <- liftIO $ transcode bytestring
pure . Just $ SourceBlob s (toS oid) filePath (Just (toSourceKind entryKind))
_ -> pure Nothing
where
toSourceKind :: Git.BlobKind -> SourceKind
toSourceKind (Git.PlainBlob mode) = Source.PlainBlob mode
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode
-- | Returns a Just identifier text if the given Syntax term contains an identifier (leaf) syntax. Otherwise returns Nothing. -- | Returns a Just identifier text if the given Syntax term contains an identifier (leaf) syntax. Otherwise returns Nothing.
identifierFor :: (HasField fields (Maybe SourceText), HasField fields Category, StringConv leaf Text) => Syntax leaf (Term (Syntax leaf) (Record fields)) -> Maybe Text identifierFor :: (HasField fields (Maybe SourceText), HasField fields Category, StringConv leaf Text) => Syntax leaf (Term (Syntax leaf) (Record fields)) -> Maybe Text
identifierFor = fmap toS . extractLeafValue . unwrap <=< maybeIdentifier identifierFor = fmap toS . extractLeafValue . unwrap <=< maybeIdentifier
@ -207,34 +105,6 @@ termSourceTextDecorator source (ann :< _) = Just (SourceText (toText (Source.sli
newtype Identifier = Identifier Text newtype Identifier = Identifier Text
deriving (Eq, Show, ToJSON) deriving (Eq, Show, ToJSON)
-- | Read the file and convert it to Unicode.
readAndTranscodeFile :: FilePath -> IO Source
readAndTranscodeFile path = do
size <- fileSize path
text <- case size of
0 -> pure B.empty
_ -> B.readFile path
transcode text
-- Based on https://github.com/haskell/bytestring/pull/79/files
-- Neccessary to be able to handle /dev/null as a file.
fileSize :: FilePath -> IO Integer
fileSize f = withBinaryFile f ReadMode $ \h -> do
-- hFileSize fails if file is not regular file (like /dev/null). Catch
-- exception and return 0 in that case.
filesz <- catch (hFileSize h) useZeroIfNotRegularFile
pure $ fromIntegral filesz `max` 0
where useZeroIfNotRegularFile :: IOException -> IO Integer
useZeroIfNotRegularFile _ = pure 0
-- | Transcode a file to a unicode source.
transcode :: B.ByteString -> IO Source
transcode text = fromText <$> do
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
pure $ Convert.toUnicode converter text
data RoseF a b = RoseF a [b] data RoseF a b = RoseF a [b]
deriving (Eq, Functor, Show) deriving (Eq, Functor, Show)

51
src/Parser/Language.hs Normal file
View File

@ -0,0 +1,51 @@
module Parser.Language
( parserForLanguage
, parserForFilePath -- TODO: Remove. Langauge detection should not come from file extension.
) where
import Data.Record
import qualified Data.Text as T
import Parser
import Source
import Prologue
import Info
import Language
import Language.Markdown
import Syntax
import TreeSitter
import Text.Parser.TreeSitter.C
import Text.Parser.TreeSitter.Go
import Text.Parser.TreeSitter.JavaScript
import Text.Parser.TreeSitter.Ruby
import Text.Parser.TreeSitter.TypeScript
-- TODO: Shouldn't need to depend on System.FilePath in here
import System.FilePath
-- | Return a parser for a given langauge or the lineByLineParser parser.
parserForLanguage :: Maybe Language -> Parser (Syntax Text) (Record DefaultFields)
parserForLanguage Nothing = lineByLineParser
parserForLanguage (Just language) = case language of
C -> treeSitterParser C tree_sitter_c
JavaScript -> treeSitterParser JavaScript tree_sitter_javascript
TypeScript -> treeSitterParser TypeScript tree_sitter_typescript
Markdown -> cmarkParser
Ruby -> treeSitterParser Ruby tree_sitter_ruby
Language.Go -> treeSitterParser Language.Go tree_sitter_go
-- | Return a parser based on the FilePath's extension (including the ".").
-- | TODO: Remove.
parserForFilePath :: FilePath -> Parser (Syntax Text) (Record DefaultFields)
parserForFilePath = parserForLanguage . languageForType . toS . takeExtension
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser (Syntax Text) (Record DefaultFields)
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> cofree <$> leaves
where
lines = actualLines source
root children = (sourceRange :. Program :. rangeToSourceSpan source sourceRange :. Nil) :< Indexed children
sourceRange = Source.totalRange source
leaf charIndex line = (Range charIndex (charIndex + T.length line) :. Program :. rangeToSourceSpan source (Range charIndex (charIndex + T.length line)) :. Nil) :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum <> [ leaf charIndex (Source.toText line) ] , charIndex + Source.length line)

View File

@ -3,6 +3,8 @@ module Semantic (main, runDiff, runParse) where
import Arguments import Arguments
import Command import Command
import Command.Files
import Command.Git
import Command.Parse import Command.Parse
import Data.Functor.Both import Data.Functor.Both
import Data.List.Split (splitWhen) import Data.List.Split (splitWhen)

View File

@ -1,5 +1,6 @@
module Command.Parse.Spec where module Command.Parse.Spec where
import Command.Files
import Command.Parse import Command.Parse
import Control.Monad import Control.Monad
import Prelude import Prelude

View File

@ -5,7 +5,15 @@ import qualified Data.ByteString as B
import qualified Data.Text.ICU.Convert as Convert import qualified Data.Text.ICU.Convert as Convert
import qualified Data.Text.ICU.Detect as Detect import qualified Data.Text.ICU.Detect as Detect
import Source import Source
import Info
import Data.Record
import Syntax
import Language
import Parser
import Parser.Language
import System.FilePath
-- TODO: Write helper functions for parse file and diff files that don't depend on Command.
-- | Read a file and convert it to Unicode. -- | Read a file and convert it to Unicode.
readFileToUnicode :: FilePath -> IO Source readFileToUnicode :: FilePath -> IO Source
@ -17,3 +25,10 @@ readFileToUnicode path = B.readFile path >>= transcode
match <- Detect.detectCharset text match <- Detect.detectCharset text
converter <- Convert.open match Nothing converter <- Convert.open match Nothing
pure $ Convert.toUnicode converter text pure $ Convert.toUnicode converter text
-- | Return a parser based on the FilePath's extension (including the "."). |
-- NB: This is intentionally duplicated from Parser.Language because our tests
-- will always need to be able to select language from file extention whereas
-- the semantic project should eventually depend on exernal language detection.
parserForFilePath :: FilePath -> Parser (Syntax Text) (Record DefaultFields)
parserForFilePath = parserForLanguage . languageForType . toS . takeExtension