mirror of
https://github.com/github/semantic.git
synced 2024-12-25 16:02:43 +03:00
Start to make a clean delineation about command function that use git or the fs
This commit is contained in:
parent
eb9547fb25
commit
18e81adb14
@ -18,6 +18,8 @@ library
|
||||
, Arguments
|
||||
, Category
|
||||
, Command
|
||||
, Command.Files
|
||||
, Command.Git
|
||||
, Command.Parse
|
||||
, Data.Align.Generic
|
||||
, Data.Functor.Both
|
||||
@ -50,6 +52,7 @@ library
|
||||
, Language.Ruby
|
||||
, Language.Ruby.Syntax
|
||||
, Parser
|
||||
, Parser.Language
|
||||
, Patch
|
||||
, Paths_semantic_diff
|
||||
, Prologue
|
||||
|
@ -23,6 +23,7 @@ module Command
|
||||
, runCommand
|
||||
) where
|
||||
|
||||
import Command.Files
|
||||
import Command.Parse
|
||||
import qualified Control.Concurrent.Async.Pool as Async
|
||||
import Control.Exception (catch)
|
||||
@ -51,6 +52,7 @@ import Info
|
||||
import Interpreter
|
||||
import Language
|
||||
import Patch
|
||||
import Parser.Language
|
||||
import Prologue hiding (concurrently, Concurrently, readFile)
|
||||
import qualified Renderer as R
|
||||
import qualified Renderer.SExpression as R
|
||||
|
46
src/Command/Files.hs
Normal file
46
src/Command/Files.hs
Normal file
@ -0,0 +1,46 @@
|
||||
module Command.Files
|
||||
( sourceBlobsFromPaths
|
||||
, transcode
|
||||
) where
|
||||
|
||||
import Prologue
|
||||
import Source
|
||||
import qualified Data.ByteString as B
|
||||
import System.IO
|
||||
import Control.Exception (catch, IOException)
|
||||
import qualified Data.Text.ICU.Convert as Convert
|
||||
import qualified Data.Text.ICU.Detect as Detect
|
||||
|
||||
-- | For the given absolute file paths, retrieves their source blobs.
|
||||
sourceBlobsFromPaths :: [FilePath] -> IO [SourceBlob]
|
||||
sourceBlobsFromPaths filePaths =
|
||||
for filePaths (\filePath -> do
|
||||
source <- readAndTranscodeFile filePath
|
||||
pure $ Source.SourceBlob source mempty filePath (Just Source.defaultPlainBlob))
|
||||
|
||||
-- | Read the file and convert it to Unicode.
|
||||
readAndTranscodeFile :: FilePath -> IO Source
|
||||
readAndTranscodeFile path = do
|
||||
size <- fileSize path
|
||||
text <- case size of
|
||||
0 -> pure B.empty
|
||||
_ -> B.readFile path
|
||||
transcode text
|
||||
|
||||
-- Based on https://github.com/haskell/bytestring/pull/79/files
|
||||
-- Neccessary to be able to handle /dev/null as a file.
|
||||
fileSize :: FilePath -> IO Integer
|
||||
fileSize f = withBinaryFile f ReadMode $ \h -> do
|
||||
-- hFileSize fails if file is not regular file (like /dev/null). Catch
|
||||
-- exception and return 0 in that case.
|
||||
filesz <- catch (hFileSize h) useZeroIfNotRegularFile
|
||||
pure $ fromIntegral filesz `max` 0
|
||||
where useZeroIfNotRegularFile :: IOException -> IO Integer
|
||||
useZeroIfNotRegularFile _ = pure 0
|
||||
|
||||
-- | Transcode a file to a unicode source.
|
||||
transcode :: B.ByteString -> IO Source
|
||||
transcode text = fromText <$> do
|
||||
match <- Detect.detectCharset text
|
||||
converter <- Convert.open match Nothing
|
||||
pure $ Convert.toUnicode converter text
|
41
src/Command/Git.hs
Normal file
41
src/Command/Git.hs
Normal file
@ -0,0 +1,41 @@
|
||||
module Command.Git (sourceBlobsFromSha) where
|
||||
|
||||
import Prologue
|
||||
import Source
|
||||
import Command.Files
|
||||
import Data.String
|
||||
import Git.Blob
|
||||
import Git.Libgit2
|
||||
import Git.Repository
|
||||
import Git.Types
|
||||
import qualified Git
|
||||
|
||||
-- | For the given sha, git repo path, and file paths, retrieves the source blobs.
|
||||
sourceBlobsFromSha :: String -> String -> [FilePath] -> IO [SourceBlob]
|
||||
sourceBlobsFromSha commitSha gitDir filePaths = do
|
||||
maybeBlobs <- withRepository lgFactory gitDir $ do
|
||||
repo <- getRepository
|
||||
object <- parseObjOid (toS commitSha)
|
||||
commit <- lookupCommit object
|
||||
tree <- lookupTree (commitTree commit)
|
||||
lift $ runReaderT (traverse (toSourceBlob tree) filePaths) repo
|
||||
|
||||
pure $ catMaybes maybeBlobs
|
||||
|
||||
where
|
||||
toSourceBlob :: Git.Tree LgRepo -> FilePath -> ReaderT LgRepo IO (Maybe SourceBlob)
|
||||
toSourceBlob tree filePath = do
|
||||
entry <- treeEntry tree (toS filePath)
|
||||
case entry of
|
||||
Just (BlobEntry entryOid entryKind) -> do
|
||||
blob <- lookupBlob entryOid
|
||||
bytestring <- blobToByteString blob
|
||||
let oid = renderObjOid $ blobOid blob
|
||||
s <- liftIO $ transcode bytestring
|
||||
pure . Just $ SourceBlob s (toS oid) filePath (Just (toSourceKind entryKind))
|
||||
_ -> pure Nothing
|
||||
where
|
||||
toSourceKind :: Git.BlobKind -> SourceKind
|
||||
toSourceKind (Git.PlainBlob mode) = Source.PlainBlob mode
|
||||
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
|
||||
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode
|
@ -1,18 +1,8 @@
|
||||
{-# LANGUAGE DataKinds, GADTs, GeneralizedNewtypeDeriving, ScopedTypeVariables, TypeFamilies, TypeOperators #-}
|
||||
module Command.Parse
|
||||
(
|
||||
-- Render parser trees
|
||||
jsonParseTree
|
||||
( jsonParseTree
|
||||
, jsonIndexParseTree
|
||||
, sExpressionParseTree
|
||||
-- Parsers
|
||||
, parserForLanguage
|
||||
, parserForFilePath -- TODO: Remove. Langauge detection should not come from file extension.
|
||||
-- Files and transcoding
|
||||
, transcode
|
||||
, sourceBlobsFromPaths
|
||||
-- Git
|
||||
, sourceBlobsFromSha
|
||||
) where
|
||||
|
||||
import Category
|
||||
@ -20,71 +10,15 @@ import Data.Aeson (ToJSON, toJSON, encode, object, (.=))
|
||||
import Data.Aeson.Types (Pair)
|
||||
import Data.Functor.Foldable hiding (Nil)
|
||||
import Data.Record
|
||||
import Data.String
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.Text as T
|
||||
import Git.Blob
|
||||
import Git.Libgit2
|
||||
import Git.Repository
|
||||
import Git.Types
|
||||
import qualified Git
|
||||
import Info
|
||||
import Language
|
||||
import Language.Markdown
|
||||
import Parser
|
||||
import Parser.Language
|
||||
import Prologue
|
||||
import Source
|
||||
import Syntax
|
||||
import System.FilePath
|
||||
import Term
|
||||
import TreeSitter
|
||||
import Renderer.JSON()
|
||||
import Renderer.SExpression
|
||||
import Text.Parser.TreeSitter.C
|
||||
import Text.Parser.TreeSitter.Go
|
||||
import Text.Parser.TreeSitter.JavaScript
|
||||
import Text.Parser.TreeSitter.Ruby
|
||||
import Text.Parser.TreeSitter.TypeScript
|
||||
import System.IO
|
||||
import Control.Exception (catch, IOException)
|
||||
import qualified Data.Text.ICU.Convert as Convert
|
||||
import qualified Data.Text.ICU.Detect as Detect
|
||||
|
||||
-- TODO: This doesn't belong in here
|
||||
|
||||
-- | Return a parser for a given langauge or the lineByLineParser parser.
|
||||
parserForLanguage :: Maybe Language -> Parser (Syntax Text) (Record DefaultFields)
|
||||
parserForLanguage Nothing = lineByLineParser
|
||||
parserForLanguage (Just language) = case language of
|
||||
C -> treeSitterParser C tree_sitter_c
|
||||
JavaScript -> treeSitterParser JavaScript tree_sitter_javascript
|
||||
TypeScript -> treeSitterParser TypeScript tree_sitter_typescript
|
||||
Markdown -> cmarkParser
|
||||
Ruby -> treeSitterParser Ruby tree_sitter_ruby
|
||||
Language.Go -> treeSitterParser Language.Go tree_sitter_go
|
||||
|
||||
-- | Return a parser based on the file extension (including the ".").
|
||||
-- | TODO: Remove.
|
||||
parserForType :: String -> Parser (Syntax Text) (Record DefaultFields)
|
||||
parserForType = parserForLanguage . languageForType
|
||||
|
||||
-- | Return the parser that should be used for a given path.
|
||||
-- | TODO: Remove.
|
||||
parserForFilePath :: FilePath -> Parser (Syntax Text) (Record DefaultFields)
|
||||
parserForFilePath = parserForType . toS . takeExtension
|
||||
|
||||
-- | A fallback parser that treats a file simply as rows of strings.
|
||||
lineByLineParser :: Parser (Syntax Text) (Record DefaultFields)
|
||||
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
|
||||
(leaves, _) -> cofree <$> leaves
|
||||
where
|
||||
lines = actualLines source
|
||||
root children = (sourceRange :. Program :. rangeToSourceSpan source sourceRange :. Nil) :< Indexed children
|
||||
sourceRange = Source.totalRange source
|
||||
leaf charIndex line = (Range charIndex (charIndex + T.length line) :. Program :. rangeToSourceSpan source (Range charIndex (charIndex + T.length line)) :. Nil) :< Leaf line
|
||||
annotateLeaves (accum, charIndex) line =
|
||||
(accum <> [ leaf charIndex (Source.toText line) ] , charIndex + Source.length line)
|
||||
|
||||
|
||||
|
||||
data ParseTreeFile = ParseTreeFile { parseTreeFilePath :: FilePath, node :: Rose ParseNode } deriving (Show)
|
||||
@ -119,6 +53,15 @@ parseNodeToJSONFields ParseNode{..} =
|
||||
<> [ "sourceText" .= sourceText | isJust sourceText ]
|
||||
<> [ "identifier" .= identifier | isJust identifier ]
|
||||
|
||||
|
||||
-- | Constructs ParseTreeFile nodes for the provided arguments and encodes them to JSON.
|
||||
jsonParseTree :: Bool -> [SourceBlob] -> IO ByteString
|
||||
jsonParseTree debug = fmap (toS . encode) . parseRoot debug ParseTreeFile Rose
|
||||
|
||||
-- | Constructs IndexFile nodes for the provided arguments and encodes them to JSON.
|
||||
jsonIndexParseTree :: Bool -> [SourceBlob] -> IO ByteString
|
||||
jsonIndexParseTree debug = fmap (toS . encode) . parseRoot debug IndexFile (\ node siblings -> node : concat siblings)
|
||||
|
||||
-- | Parses file contents into an SExpression format for the provided arguments.
|
||||
sExpressionParseTree :: Bool -> [SourceBlob] -> IO ByteString
|
||||
sExpressionParseTree _ blobs =
|
||||
@ -135,56 +78,11 @@ parseRoot debug construct combine blobs = for blobs (\ sourceBlob@SourceBlob{..}
|
||||
makeNode (head :. range :. category :. sourceSpan :. Nil) syntax =
|
||||
ParseNode (toS category) range head sourceSpan (identifierFor syntax)
|
||||
|
||||
-- | Constructs IndexFile nodes for the provided arguments and encodes them to JSON.
|
||||
jsonIndexParseTree :: Bool -> [SourceBlob] -> IO ByteString
|
||||
jsonIndexParseTree debug = fmap (toS . encode) . parseRoot debug IndexFile (\ node siblings -> node : concat siblings)
|
||||
|
||||
-- | Constructs ParseTreeFile nodes for the provided arguments and encodes them to JSON.
|
||||
jsonParseTree :: Bool -> [SourceBlob] -> IO ByteString
|
||||
jsonParseTree debug = fmap (toS . encode) . parseRoot debug ParseTreeFile Rose
|
||||
|
||||
-- | Determines the term decorator to use when parsing.
|
||||
parseDecorator :: (Functor f, HasField fields Range) => Bool -> (Source -> TermDecorator f fields (Maybe SourceText))
|
||||
parseDecorator True = termSourceTextDecorator
|
||||
parseDecorator False = const . const Nothing
|
||||
|
||||
-- | For the given absolute file paths, retrieves their source blobs.
|
||||
sourceBlobsFromPaths :: [FilePath] -> IO [SourceBlob]
|
||||
sourceBlobsFromPaths filePaths =
|
||||
for filePaths (\filePath -> do
|
||||
source <- readAndTranscodeFile filePath
|
||||
pure $ Source.SourceBlob source mempty filePath (Just Source.defaultPlainBlob))
|
||||
|
||||
-- | For the given sha, git repo path, and file paths, retrieves the source blobs.
|
||||
sourceBlobsFromSha :: String -> String -> [FilePath] -> IO [SourceBlob]
|
||||
sourceBlobsFromSha commitSha gitDir filePaths = do
|
||||
maybeBlobs <- withRepository lgFactory gitDir $ do
|
||||
repo <- getRepository
|
||||
object <- parseObjOid (toS commitSha)
|
||||
commit <- lookupCommit object
|
||||
tree <- lookupTree (commitTree commit)
|
||||
lift $ runReaderT (traverse (toSourceBlob tree) filePaths) repo
|
||||
|
||||
pure $ catMaybes maybeBlobs
|
||||
|
||||
where
|
||||
toSourceBlob :: Git.Tree LgRepo -> FilePath -> ReaderT LgRepo IO (Maybe SourceBlob)
|
||||
toSourceBlob tree filePath = do
|
||||
entry <- treeEntry tree (toS filePath)
|
||||
case entry of
|
||||
Just (BlobEntry entryOid entryKind) -> do
|
||||
blob <- lookupBlob entryOid
|
||||
bytestring <- blobToByteString blob
|
||||
let oid = renderObjOid $ blobOid blob
|
||||
s <- liftIO $ transcode bytestring
|
||||
pure . Just $ SourceBlob s (toS oid) filePath (Just (toSourceKind entryKind))
|
||||
_ -> pure Nothing
|
||||
where
|
||||
toSourceKind :: Git.BlobKind -> SourceKind
|
||||
toSourceKind (Git.PlainBlob mode) = Source.PlainBlob mode
|
||||
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
|
||||
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode
|
||||
|
||||
-- | Returns a Just identifier text if the given Syntax term contains an identifier (leaf) syntax. Otherwise returns Nothing.
|
||||
identifierFor :: (HasField fields (Maybe SourceText), HasField fields Category, StringConv leaf Text) => Syntax leaf (Term (Syntax leaf) (Record fields)) -> Maybe Text
|
||||
identifierFor = fmap toS . extractLeafValue . unwrap <=< maybeIdentifier
|
||||
@ -207,34 +105,6 @@ termSourceTextDecorator source (ann :< _) = Just (SourceText (toText (Source.sli
|
||||
newtype Identifier = Identifier Text
|
||||
deriving (Eq, Show, ToJSON)
|
||||
|
||||
-- | Read the file and convert it to Unicode.
|
||||
readAndTranscodeFile :: FilePath -> IO Source
|
||||
readAndTranscodeFile path = do
|
||||
size <- fileSize path
|
||||
text <- case size of
|
||||
0 -> pure B.empty
|
||||
_ -> B.readFile path
|
||||
transcode text
|
||||
|
||||
-- Based on https://github.com/haskell/bytestring/pull/79/files
|
||||
-- Neccessary to be able to handle /dev/null as a file.
|
||||
fileSize :: FilePath -> IO Integer
|
||||
fileSize f = withBinaryFile f ReadMode $ \h -> do
|
||||
-- hFileSize fails if file is not regular file (like /dev/null). Catch
|
||||
-- exception and return 0 in that case.
|
||||
filesz <- catch (hFileSize h) useZeroIfNotRegularFile
|
||||
pure $ fromIntegral filesz `max` 0
|
||||
where useZeroIfNotRegularFile :: IOException -> IO Integer
|
||||
useZeroIfNotRegularFile _ = pure 0
|
||||
|
||||
-- | Transcode a file to a unicode source.
|
||||
transcode :: B.ByteString -> IO Source
|
||||
transcode text = fromText <$> do
|
||||
match <- Detect.detectCharset text
|
||||
converter <- Convert.open match Nothing
|
||||
pure $ Convert.toUnicode converter text
|
||||
|
||||
|
||||
data RoseF a b = RoseF a [b]
|
||||
deriving (Eq, Functor, Show)
|
||||
|
||||
|
51
src/Parser/Language.hs
Normal file
51
src/Parser/Language.hs
Normal file
@ -0,0 +1,51 @@
|
||||
module Parser.Language
|
||||
( parserForLanguage
|
||||
, parserForFilePath -- TODO: Remove. Langauge detection should not come from file extension.
|
||||
) where
|
||||
|
||||
import Data.Record
|
||||
import qualified Data.Text as T
|
||||
import Parser
|
||||
import Source
|
||||
import Prologue
|
||||
import Info
|
||||
import Language
|
||||
import Language.Markdown
|
||||
import Syntax
|
||||
import TreeSitter
|
||||
import Text.Parser.TreeSitter.C
|
||||
import Text.Parser.TreeSitter.Go
|
||||
import Text.Parser.TreeSitter.JavaScript
|
||||
import Text.Parser.TreeSitter.Ruby
|
||||
import Text.Parser.TreeSitter.TypeScript
|
||||
|
||||
-- TODO: Shouldn't need to depend on System.FilePath in here
|
||||
import System.FilePath
|
||||
|
||||
-- | Return a parser for a given langauge or the lineByLineParser parser.
|
||||
parserForLanguage :: Maybe Language -> Parser (Syntax Text) (Record DefaultFields)
|
||||
parserForLanguage Nothing = lineByLineParser
|
||||
parserForLanguage (Just language) = case language of
|
||||
C -> treeSitterParser C tree_sitter_c
|
||||
JavaScript -> treeSitterParser JavaScript tree_sitter_javascript
|
||||
TypeScript -> treeSitterParser TypeScript tree_sitter_typescript
|
||||
Markdown -> cmarkParser
|
||||
Ruby -> treeSitterParser Ruby tree_sitter_ruby
|
||||
Language.Go -> treeSitterParser Language.Go tree_sitter_go
|
||||
|
||||
-- | Return a parser based on the FilePath's extension (including the ".").
|
||||
-- | TODO: Remove.
|
||||
parserForFilePath :: FilePath -> Parser (Syntax Text) (Record DefaultFields)
|
||||
parserForFilePath = parserForLanguage . languageForType . toS . takeExtension
|
||||
|
||||
-- | A fallback parser that treats a file simply as rows of strings.
|
||||
lineByLineParser :: Parser (Syntax Text) (Record DefaultFields)
|
||||
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
|
||||
(leaves, _) -> cofree <$> leaves
|
||||
where
|
||||
lines = actualLines source
|
||||
root children = (sourceRange :. Program :. rangeToSourceSpan source sourceRange :. Nil) :< Indexed children
|
||||
sourceRange = Source.totalRange source
|
||||
leaf charIndex line = (Range charIndex (charIndex + T.length line) :. Program :. rangeToSourceSpan source (Range charIndex (charIndex + T.length line)) :. Nil) :< Leaf line
|
||||
annotateLeaves (accum, charIndex) line =
|
||||
(accum <> [ leaf charIndex (Source.toText line) ] , charIndex + Source.length line)
|
@ -3,6 +3,8 @@ module Semantic (main, runDiff, runParse) where
|
||||
|
||||
import Arguments
|
||||
import Command
|
||||
import Command.Files
|
||||
import Command.Git
|
||||
import Command.Parse
|
||||
import Data.Functor.Both
|
||||
import Data.List.Split (splitWhen)
|
||||
|
@ -1,5 +1,6 @@
|
||||
module Command.Parse.Spec where
|
||||
|
||||
import Command.Files
|
||||
import Command.Parse
|
||||
import Control.Monad
|
||||
import Prelude
|
||||
|
@ -5,7 +5,15 @@ import qualified Data.ByteString as B
|
||||
import qualified Data.Text.ICU.Convert as Convert
|
||||
import qualified Data.Text.ICU.Detect as Detect
|
||||
import Source
|
||||
import Info
|
||||
import Data.Record
|
||||
import Syntax
|
||||
import Language
|
||||
import Parser
|
||||
import Parser.Language
|
||||
import System.FilePath
|
||||
|
||||
-- TODO: Write helper functions for parse file and diff files that don't depend on Command.
|
||||
|
||||
-- | Read a file and convert it to Unicode.
|
||||
readFileToUnicode :: FilePath -> IO Source
|
||||
@ -17,3 +25,10 @@ readFileToUnicode path = B.readFile path >>= transcode
|
||||
match <- Detect.detectCharset text
|
||||
converter <- Convert.open match Nothing
|
||||
pure $ Convert.toUnicode converter text
|
||||
|
||||
-- | Return a parser based on the FilePath's extension (including the "."). |
|
||||
-- NB: This is intentionally duplicated from Parser.Language because our tests
|
||||
-- will always need to be able to select language from file extention whereas
|
||||
-- the semantic project should eventually depend on exernal language detection.
|
||||
parserForFilePath :: FilePath -> Parser (Syntax Text) (Record DefaultFields)
|
||||
parserForFilePath = parserForLanguage . languageForType . toS . takeExtension
|
||||
|
Loading…
Reference in New Issue
Block a user