mirror of
https://github.com/github/semantic.git
synced 2024-12-26 08:25:19 +03:00
Merge remote-tracking branch 'origin/master' into syntax-a-la-carte
This commit is contained in:
commit
2558b4c3f1
@ -67,6 +67,7 @@ library
|
||||
, FDoc.Term
|
||||
, FDoc.RecursionSchemes
|
||||
, FDoc.NatExample
|
||||
, GitmonClient
|
||||
build-depends: base >= 4.8 && < 5
|
||||
, aeson
|
||||
, aeson-pretty
|
||||
@ -109,8 +110,11 @@ library
|
||||
, go
|
||||
, ruby
|
||||
, javascript
|
||||
, network
|
||||
, clock
|
||||
, yaml
|
||||
default-language: Haskell2010
|
||||
default-extensions: DeriveFunctor, DeriveFoldable, DeriveTraversable, DeriveGeneric, FlexibleContexts, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards, LambdaCase, StrictData
|
||||
default-extensions: DeriveFunctor, DeriveFoldable, DeriveTraversable, DeriveGeneric, FlexibleContexts, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards, StrictData
|
||||
ghc-options: -Wall -fno-warn-name-shadowing -O2 -j
|
||||
ghc-prof-options: -fprof-auto
|
||||
|
||||
@ -151,6 +155,7 @@ test-suite test
|
||||
, Data.RandomWalkSimilarity.Spec
|
||||
, DiffSpec
|
||||
, SummarySpec
|
||||
, GitmonClientSpec
|
||||
, InterpreterSpec
|
||||
, PatchOutputSpec
|
||||
, RangeSpec
|
||||
@ -170,6 +175,7 @@ test-suite test
|
||||
, deepseq
|
||||
, filepath
|
||||
, gitlib
|
||||
, gitlib-libgit2
|
||||
, Glob
|
||||
, hspec >= 2.4.1
|
||||
, hspec-core
|
||||
@ -177,11 +183,14 @@ test-suite test
|
||||
, HUnit
|
||||
, leancheck
|
||||
, mtl
|
||||
, network
|
||||
, protolude
|
||||
, containers
|
||||
, recursion-schemes >= 4.1
|
||||
, regex-compat
|
||||
, semantic-diff
|
||||
, text >= 1.2.1.3
|
||||
, unordered-containers
|
||||
, these
|
||||
, vector
|
||||
ghc-options: -threaded -rtsopts -with-rtsopts=-N -j -pgml=script/g++
|
||||
|
@ -20,6 +20,7 @@ import Arguments
|
||||
import Category
|
||||
import Data.RandomWalkSimilarity
|
||||
import Data.Record
|
||||
import GitmonClient
|
||||
import Info
|
||||
import Diff
|
||||
import Interpreter
|
||||
@ -102,29 +103,30 @@ blobEntriesToDiff shas = do
|
||||
a <- blobEntries (fst shas)
|
||||
b <- blobEntries (snd shas)
|
||||
pure $ (a \\ b) <> (b \\ a)
|
||||
where blobEntries sha = treeForCommitSha sha >>= treeBlobEntries
|
||||
where blobEntries sha = treeForCommitSha sha >>= treeBlobEntries'
|
||||
treeBlobEntries' tree = reportGitmon "ls-tree" $ treeBlobEntries tree
|
||||
|
||||
-- | Returns a Git.Tree for a commit sha
|
||||
treeForCommitSha :: String -> ReaderT LgRepo IO (Git.Tree LgRepo)
|
||||
treeForCommitSha sha = do
|
||||
object <- parseObjOid (toS sha)
|
||||
commit <- lookupCommit object
|
||||
lookupTree (commitTree commit)
|
||||
commit <- reportGitmon "cat-file" $ lookupCommit object
|
||||
reportGitmon "cat-file" $ lookupTree (commitTree commit)
|
||||
|
||||
-- | Returns a SourceBlob given a relative file path, and the sha to look up.
|
||||
getSourceBlob :: FilePath -> String -> ReaderT LgRepo IO SourceBlob
|
||||
getSourceBlob :: FilePath -> String -> ReaderT LgRepo IO Source.SourceBlob
|
||||
getSourceBlob path sha = do
|
||||
tree <- treeForCommitSha sha
|
||||
entry <- treeEntry tree (toS path)
|
||||
entry <- reportGitmon "ls-tree" $ treeEntry tree (toS path)
|
||||
(bytestring, oid, mode) <- case entry of
|
||||
Nothing -> pure (mempty, mempty, Nothing)
|
||||
Just (BlobEntry entryOid entryKind) -> do
|
||||
blob <- lookupBlob entryOid
|
||||
blob <- reportGitmon "cat-file" $ lookupBlob entryOid
|
||||
s <- blobToByteString blob
|
||||
let oid = renderObjOid $ blobOid blob
|
||||
pure (s, oid, Just entryKind)
|
||||
s <- liftIO $ transcode bytestring
|
||||
pure $ SourceBlob s (toS oid) path (toSourceKind <$> mode)
|
||||
pure $ Source.SourceBlob s (toS oid) path (toSourceKind <$> mode)
|
||||
where
|
||||
toSourceKind :: Git.BlobKind -> SourceKind
|
||||
toSourceKind (Git.PlainBlob mode) = Source.PlainBlob mode
|
||||
|
168
src/GitmonClient.hs
Normal file
168
src/GitmonClient.hs
Normal file
@ -0,0 +1,168 @@
|
||||
-- | We use BangPatterns to force evaluation of git operations to preserve accuracy in measuring system stats (particularly disk read bytes)
|
||||
{-# LANGUAGE RecordWildCards, DeriveGeneric, RankNTypes, BangPatterns #-}
|
||||
module GitmonClient where
|
||||
|
||||
import Control.Exception (throw)
|
||||
import Data.Aeson
|
||||
import Data.Aeson.Types
|
||||
import Data.ByteString.Lazy (toStrict)
|
||||
import Data.Char (toLower)
|
||||
import Data.Text (unpack, isInfixOf)
|
||||
import qualified Data.Yaml as Y
|
||||
import GHC.Generics
|
||||
import Git.Libgit2
|
||||
import Network.Socket hiding (recv)
|
||||
import Network.Socket.ByteString (sendAll, recv)
|
||||
import Prelude
|
||||
import Prologue hiding (toStrict, map, print, show)
|
||||
import System.Clock
|
||||
import System.Environment
|
||||
import System.Timeout
|
||||
import Text.Regex
|
||||
|
||||
newtype GitmonException = GitmonException String deriving (Show, Typeable)
|
||||
|
||||
instance Exception GitmonException
|
||||
|
||||
|
||||
data ProcIO = ProcIO { readBytes :: Integer
|
||||
, writeBytes :: Integer } deriving (Show, Generic)
|
||||
|
||||
instance FromJSON ProcIO
|
||||
|
||||
instance ToJSON ProcIO where
|
||||
toJSON ProcIO{..} = object [ "read_bytes" .= readBytes, "write_bytes" .= writeBytes ]
|
||||
|
||||
|
||||
data ProcessData = ProcessUpdateData { gitDir :: Maybe String
|
||||
, program :: String
|
||||
, realIP :: Maybe String
|
||||
, repoName :: Maybe String
|
||||
, repoID :: Maybe Int
|
||||
, userID :: Maybe Int
|
||||
, via :: String }
|
||||
| ProcessScheduleData
|
||||
| ProcessFinishData { cpu :: Integer
|
||||
, diskReadBytes :: Integer
|
||||
, diskWriteBytes :: Integer
|
||||
, resultCode :: Integer } deriving (Generic, Show)
|
||||
|
||||
instance ToJSON ProcessData where
|
||||
toJSON ProcessUpdateData{..} = object [ "git_dir" .= gitDir, "program" .= program, "repo_name" .= repoName, "real_ip" .= realIP, "repo_id" .= repoID, "user_id" .= userID, "via" .= via ]
|
||||
toJSON ProcessScheduleData = object []
|
||||
toJSON ProcessFinishData{..} = object [ "cpu" .= cpu, "disk_read_bytes" .= diskReadBytes, "disk_write_bytes" .= diskWriteBytes, "result_code" .= resultCode ]
|
||||
|
||||
|
||||
data GitmonCommand = Update
|
||||
| Finish
|
||||
| Schedule deriving (Generic, Show)
|
||||
|
||||
instance ToJSON GitmonCommand where
|
||||
toJSON = genericToJSON defaultOptions { constructorTagModifier = map toLower }
|
||||
|
||||
|
||||
data GitmonMsg = GitmonMsg { command :: GitmonCommand
|
||||
, processData :: ProcessData } deriving (Show)
|
||||
|
||||
instance ToJSON GitmonMsg where
|
||||
toJSON GitmonMsg{..} = case command of
|
||||
Update -> object ["command" .= ("update" :: String), "data" .= processData]
|
||||
Finish -> object ["command" .= ("finish" :: String), "data" .= processData]
|
||||
Schedule -> object ["command" .= ("schedule" :: String)]
|
||||
|
||||
|
||||
type ProcInfo = Either Y.ParseException (Maybe ProcIO)
|
||||
|
||||
newtype SocketFactory = SocketFactory { withSocket :: forall a. (Socket -> IO a) -> IO a }
|
||||
|
||||
reportGitmon :: String -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
|
||||
reportGitmon = reportGitmon' SocketFactory { withSocket = withGitmonSocket }
|
||||
|
||||
reportGitmon' :: SocketFactory -> String -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
|
||||
reportGitmon' SocketFactory{..} program gitCommand =
|
||||
join . liftIO . withSocket $ \sock -> do
|
||||
[gitDir, realIP, repoName, repoID, userID] <- traverse lookupEnv ["GIT_DIR", "GIT_SOCKSTAT_VAR_real_ip", "GIT_SOCKSTAT_VAR_repo_name", "GIT_SOCKSTAT_VAR_repo_id", "GIT_SOCKSTAT_VAR_user_id"]
|
||||
void . safeGitmonIO . sendAll sock $ processJSON Update (ProcessUpdateData gitDir program realIP repoName (readIntFromEnv repoID) (readIntFromEnv userID) "semantic-diff")
|
||||
void . safeGitmonIO . sendAll sock $ processJSON Schedule ProcessScheduleData
|
||||
gitmonStatus <- safeGitmonIO $ recv sock 1024
|
||||
|
||||
(startTime, beforeProcIOContents) <- collectStats
|
||||
-- | The result of the gitCommand is strictly evaluated (to next normal form). This is not equivalent to a `deepseq`. The underlying `Git.Types` do not have instances of `NFData` preventing us from using `deepseq` at this time.
|
||||
let !result = withGitmonStatus gitmonStatus gitCommand
|
||||
(afterTime, afterProcIOContents) <- collectStats
|
||||
|
||||
let (cpuTime, diskReadBytes, diskWriteBytes, resultCode) = procStats startTime afterTime beforeProcIOContents afterProcIOContents
|
||||
void . safeGitmonIO . sendAll sock $ processJSON Finish (ProcessFinishData cpuTime diskReadBytes diskWriteBytes resultCode)
|
||||
pure result
|
||||
|
||||
where
|
||||
withGitmonStatus :: Maybe ByteString -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
|
||||
withGitmonStatus maybeGitmonStatus gitCommand = case maybeGitmonStatus of
|
||||
Just gitmonStatus | "fail" `isInfixOf` decodeUtf8 gitmonStatus -> throwGitmonException gitmonStatus
|
||||
_ -> gitCommand
|
||||
|
||||
throwGitmonException :: ByteString -> e
|
||||
throwGitmonException command = throw . GitmonException . unpack $ "Received: '" <> decodeUtf8 command <> "' from Gitmon"
|
||||
|
||||
collectStats :: IO (TimeSpec, ProcInfo)
|
||||
collectStats = do
|
||||
time <- getTime clock
|
||||
procIOContents <- Y.decodeFileEither procFileAddr :: IO ProcInfo
|
||||
pure (time, procIOContents)
|
||||
|
||||
procStats :: TimeSpec -> TimeSpec -> ProcInfo -> ProcInfo -> ( Integer, Integer, Integer, Integer )
|
||||
procStats beforeTime afterTime beforeProcIOContents afterProcIOContents = ( cpuTime, diskReadBytes, diskWriteBytes, resultCode )
|
||||
where
|
||||
-- | toNanoSecs converts TimeSpec to Integer, and we further convert this value to milliseconds (expected by Gitmon).
|
||||
cpuTime = div (1 * 1000 * 1000) . toNanoSecs $ afterTime - beforeTime
|
||||
beforeDiskReadBytes = either (const 0) (maybe 0 readBytes) beforeProcIOContents
|
||||
afterDiskReadBytes = either (const 0) (maybe 0 readBytes) afterProcIOContents
|
||||
beforeDiskWriteBytes = either (const 0) (maybe 0 writeBytes) beforeProcIOContents
|
||||
afterDiskWriteBytes = either (const 0) (maybe 0 writeBytes) afterProcIOContents
|
||||
diskReadBytes = afterDiskReadBytes - beforeDiskReadBytes
|
||||
diskWriteBytes = afterDiskWriteBytes - beforeDiskWriteBytes
|
||||
resultCode = 0
|
||||
|
||||
readIntFromEnv :: Maybe String -> Maybe Int
|
||||
readIntFromEnv Nothing = Nothing
|
||||
readIntFromEnv (Just s) = readInt $ matchRegex regex s
|
||||
where
|
||||
-- | Expected format for userID and repoID is: "uint:123",
|
||||
-- where "uint:" indicates an unsigned integer followed by an integer value.
|
||||
regex :: Regex
|
||||
regex = mkRegexWithOpts "^uint:([0-9]+)$" False True
|
||||
|
||||
readInt :: Maybe [String] -> Maybe Int
|
||||
readInt (Just [s]) = Just (read s :: Int)
|
||||
readInt _ = Nothing
|
||||
|
||||
withGitmonSocket :: (Socket -> IO c) -> IO c
|
||||
withGitmonSocket = bracket connectSocket close
|
||||
where
|
||||
connectSocket = do
|
||||
s <- socket AF_UNIX Stream defaultProtocol
|
||||
void . safeGitmonIO $ connect s (SockAddrUnix gitmonSocketAddr)
|
||||
pure s
|
||||
|
||||
-- | Timeout in nanoseconds to wait before giving up on Gitmon response to schedule.
|
||||
gitmonTimeout :: Int
|
||||
gitmonTimeout = 1 * 1000 * 1000
|
||||
|
||||
gitmonSocketAddr :: String
|
||||
gitmonSocketAddr = "/tmp/gitstats.sock"
|
||||
|
||||
safeGitmonIO :: MonadIO m => IO a -> m (Maybe a)
|
||||
safeGitmonIO command = liftIO $ timeout gitmonTimeout command `catch` logError
|
||||
|
||||
logError :: IOException -> IO (Maybe a)
|
||||
logError _ = pure Nothing
|
||||
|
||||
procFileAddr :: String
|
||||
procFileAddr = "/proc/self/io"
|
||||
|
||||
clock :: Clock
|
||||
clock = Realtime
|
||||
|
||||
processJSON :: GitmonCommand -> ProcessData -> ByteString
|
||||
processJSON command processData = toStrict . encode $ GitmonMsg command processData
|
||||
|
@ -40,7 +40,7 @@ runSteps algorithm = case runStep algorithm of
|
||||
runStep :: (Eq leaf, HasField fields Category, HasField fields (Maybe FeatureVector))
|
||||
=> Algorithm (SyntaxTerm leaf fields) (SyntaxDiff leaf fields) result
|
||||
-> Either result (Algorithm (SyntaxTerm leaf fields) (SyntaxDiff leaf fields) result)
|
||||
runStep = \case
|
||||
runStep step = case step of
|
||||
Return a -> Left a
|
||||
algorithm `Then` cont -> Right $ decompose algorithm >>= cont
|
||||
|
||||
@ -49,7 +49,7 @@ runStep = \case
|
||||
decompose :: (Eq leaf, HasField fields Category, HasField fields (Maybe FeatureVector))
|
||||
=> AlgorithmF (SyntaxTerm leaf fields) (SyntaxDiff leaf fields) result -- ^ The step in an algorithm to decompose into its next steps.
|
||||
-> Algorithm (SyntaxTerm leaf fields) (SyntaxDiff leaf fields) result -- ^ The sequence of next steps to undertake to continue the algorithm.
|
||||
decompose = \case
|
||||
decompose step = case step of
|
||||
Diff t1 t2 -> algorithmWithTerms t1 t2
|
||||
Linear t1 t2 -> case galignWith diffThese (unwrap t1) (unwrap t2) of
|
||||
Just result -> wrap . (both (extract t1) (extract t2) :<) <$> sequenceA result
|
||||
|
@ -76,7 +76,7 @@ termAssignment source category children = case (category, children) of
|
||||
_ -> Nothing
|
||||
|
||||
categoryForGoName :: Text -> Category
|
||||
categoryForGoName = \case
|
||||
categoryForGoName name = case name of
|
||||
"identifier" -> Identifier
|
||||
"int_literal" -> NumberLiteral
|
||||
"float_literal" -> FloatLiteral
|
||||
|
@ -96,7 +96,7 @@ termAssignment _ category children
|
||||
withRecord record syntax = cofree (record :< syntax)
|
||||
|
||||
categoryForRubyName :: Text -> Category
|
||||
categoryForRubyName = \case
|
||||
categoryForRubyName name = case name of
|
||||
"argument_list_with_parens" -> Args
|
||||
"argument_list" -> Args
|
||||
"argument_pair" -> ArgumentPair
|
||||
|
@ -81,7 +81,7 @@ maybeSnd :: These a b -> Maybe b
|
||||
maybeSnd = these (const Nothing) Just ((Just .) . flip const)
|
||||
|
||||
patchType :: Patch a -> Text
|
||||
patchType = \case
|
||||
patchType patch = case patch of
|
||||
Replace{} -> "modified"
|
||||
Insert{} -> "added"
|
||||
Delete{} -> "removed"
|
||||
|
@ -33,7 +33,7 @@ data Annotatable a = Annotatable a | Unannotatable a
|
||||
|
||||
annotatable :: SyntaxTerm leaf fields -> Annotatable (SyntaxTerm leaf fields)
|
||||
annotatable term = isAnnotatable (unwrap term) term
|
||||
where isAnnotatable = \case
|
||||
where isAnnotatable syntax = case syntax of
|
||||
S.Class{} -> Annotatable
|
||||
S.Method{} -> Annotatable
|
||||
S.Function{} -> Annotatable
|
||||
@ -44,7 +44,7 @@ data Identifiable a = Identifiable a | Unidentifiable a
|
||||
|
||||
identifiable :: SyntaxTerm leaf fields -> Identifiable (SyntaxTerm leaf fields)
|
||||
identifiable term = isIdentifiable (unwrap term) term
|
||||
where isIdentifiable = \case
|
||||
where isIdentifiable syntax = case syntax of
|
||||
S.FunctionCall{} -> Identifiable
|
||||
S.MethodCall{} -> Identifiable
|
||||
S.Function{} -> Identifiable
|
||||
@ -138,15 +138,15 @@ diffToDiffSummaries sources = para $ \diff ->
|
||||
|
||||
-- Flattens a patch of diff infos into a list of docs, one for every 'LeafInfo' or `ErrorInfo` it contains.
|
||||
jsonDocSummaries :: Patch DiffInfo -> [JSONSummary Doc SourceSpans]
|
||||
jsonDocSummaries = \case
|
||||
p@(Replace i1 i2) -> zipWith (\a b ->
|
||||
jsonDocSummaries patch = case patch of
|
||||
Replace i1 i2 -> zipWith (\a b ->
|
||||
JSONSummary
|
||||
{
|
||||
info = info (prefixWithPatch p This a) <+> "with" <+> info b
|
||||
info = info (prefixWithPatch patch This a) <+> "with" <+> info b
|
||||
, span = SourceSpans $ These (span a) (span b)
|
||||
}) (toLeafInfos i1) (toLeafInfos i2)
|
||||
p@(Insert info) -> prefixWithPatch p That <$> toLeafInfos info
|
||||
p@(Delete info) -> prefixWithPatch p This <$> toLeafInfos info
|
||||
Insert info -> prefixWithPatch patch That <$> toLeafInfos info
|
||||
Delete info -> prefixWithPatch patch This <$> toLeafInfos info
|
||||
|
||||
-- Prefixes a given doc with the type of patch it represents.
|
||||
prefixWithPatch :: Patch DiffInfo -> (SourceSpan -> These SourceSpan SourceSpan) -> JSONSummary Doc SourceSpan -> JSONSummary Doc SourceSpans
|
||||
@ -157,7 +157,7 @@ prefixWithPatch patch constructor = prefixWithThe (patchToPrefix patch)
|
||||
info = prefix <+> info jsonSummary
|
||||
, span = SourceSpans $ constructor (span jsonSummary)
|
||||
}
|
||||
patchToPrefix = \case
|
||||
patchToPrefix patch = case patch of
|
||||
(Replace _ _) -> "Replaced"
|
||||
(Insert _) -> "Added"
|
||||
(Delete _) -> "Deleted"
|
||||
@ -380,7 +380,7 @@ instance HasCategory Text where
|
||||
toCategoryName = identity
|
||||
|
||||
instance HasCategory Category where
|
||||
toCategoryName = \case
|
||||
toCategoryName category = case category of
|
||||
ArrayLiteral -> "array"
|
||||
BooleanOperator -> "boolean operator"
|
||||
MathOperator -> "math operator"
|
||||
|
@ -109,7 +109,7 @@ toTOCSummaries patch = case afterOrBefore patch of
|
||||
_ -> NotSummarizable
|
||||
|
||||
flattenPatch :: Patch DiffInfo -> [Patch DiffInfo]
|
||||
flattenPatch = \case
|
||||
flattenPatch patch = case patch of
|
||||
Replace i1 i2 -> zipWith Replace (toLeafInfos' i1) (toLeafInfos' i2)
|
||||
Insert info -> Insert <$> toLeafInfos' info
|
||||
Delete info -> Delete <$> toLeafInfos' info
|
||||
@ -133,7 +133,7 @@ mapToInSummarizable sources diff children = case (beforeTerm diff, afterTerm dif
|
||||
|
||||
summarizable :: ComonadCofree (Syntax t) w => w a -> SummarizableTerm (w a)
|
||||
summarizable term = go (unwrap term) term
|
||||
where go = \case
|
||||
where go syntax = case syntax of
|
||||
S.Method{} -> SummarizableTerm
|
||||
S.Function{} -> SummarizableTerm
|
||||
_ -> NotSummarizableTerm
|
||||
@ -143,7 +143,7 @@ toJSONSummaries TOCSummary{..} = case afterOrBefore summaryPatch of
|
||||
Just diffInfo -> toJSONSummaries' diffInfo
|
||||
Nothing -> panic "No diff"
|
||||
where
|
||||
toJSONSummaries' = \case
|
||||
toJSONSummaries' diffInfo = case diffInfo of
|
||||
ErrorInfo{..} -> pure $ ErrorSummary termName infoSpan
|
||||
BranchInfo{..} -> branches >>= toJSONSummaries'
|
||||
LeafInfo{..} -> case parentInfo of
|
||||
@ -183,6 +183,6 @@ toTermName parentOffset parentSource term = case unwrap term of
|
||||
|
||||
-- The user-facing category name
|
||||
toCategoryName :: Category -> Text
|
||||
toCategoryName = \case
|
||||
toCategoryName category = case category of
|
||||
C.SingletonMethod -> "Method"
|
||||
c -> show c
|
||||
|
@ -18,6 +18,7 @@ import Source
|
||||
import qualified Syntax
|
||||
import Foreign
|
||||
import Foreign.C.String (peekCString)
|
||||
import Foreign.Marshal.Array (allocaArray)
|
||||
import Data.Text.Foreign (withCStringLen)
|
||||
import qualified Syntax as S
|
||||
import Term
|
||||
@ -41,39 +42,40 @@ treeSitterParser language grammar blob = do
|
||||
|
||||
-- | Return a parser for a tree sitter language & document.
|
||||
documentToTerm :: Language -> Ptr Document -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
|
||||
documentToTerm language document SourceBlob{..} = alloca $ \ root -> do
|
||||
ts_document_root_node_p document root
|
||||
toTerm root (totalRange source) source
|
||||
where toTerm node range source = do
|
||||
name <- ts_node_p_name node document
|
||||
name <- peekCString name
|
||||
count <- ts_node_p_named_child_count node
|
||||
documentToTerm language document SourceBlob{..} = do
|
||||
root <- alloca (\ rootPtr -> do
|
||||
ts_document_root_node_p document rootPtr
|
||||
peek rootPtr)
|
||||
toTerm root source
|
||||
where toTerm :: Node -> Source -> IO (Term (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan]))
|
||||
toTerm node source = do
|
||||
name <- peekCString (nodeType node)
|
||||
|
||||
let getChild getter parentNode n childNode = do
|
||||
_ <- getter parentNode n childNode
|
||||
let childRange = nodeRange childNode
|
||||
toTerm childNode childRange (slice (offsetRange childRange (negate (start range))) source)
|
||||
children <- getChildren (fromIntegral (nodeNamedChildCount node)) copyNamed
|
||||
let allChildren = getChildren (fromIntegral (nodeChildCount node)) copyAll
|
||||
|
||||
children <- filter isNonEmpty <$> traverse (alloca . getChild ts_node_p_named_child node) (take (fromIntegral count) [0..])
|
||||
assignTerm language source (range :. categoryForLanguageProductionName language (toS name) :. nodeSpan node :. Nil) children allChildren
|
||||
where getChildren count copy = do
|
||||
nodes <- allocaArray count $ \ childNodesPtr -> do
|
||||
_ <- with (nodeTSNode node) (\ nodePtr -> copy nodePtr childNodesPtr (fromIntegral count))
|
||||
peekArray count childNodesPtr
|
||||
children <- traverse childNodeToTerm nodes
|
||||
return $! filter isNonEmpty children
|
||||
childNodeToTerm childNode = toTerm childNode (slice (offsetRange (nodeRange childNode) (negate (start range))) source)
|
||||
range = nodeRange node
|
||||
copyNamed = ts_node_copy_named_child_nodes document
|
||||
copyAll = ts_node_copy_child_nodes document
|
||||
|
||||
let startPos = SourcePos (1 + (fromIntegral $! ts_node_p_start_point_row node)) (1 + (fromIntegral $! ts_node_p_start_point_column node))
|
||||
let endPos = SourcePos (1 + (fromIntegral $! ts_node_p_end_point_row node)) (1 + (fromIntegral $! ts_node_p_end_point_column node))
|
||||
let sourceSpan = SourceSpan { spanStart = startPos , spanEnd = endPos }
|
||||
|
||||
allChildrenCount <- ts_node_p_child_count node
|
||||
let allChildren = filter isNonEmpty <$> traverse (alloca . getChild ts_node_p_child node) (take (fromIntegral allChildrenCount) [0..])
|
||||
|
||||
-- Note: The strict application here is semantically important.
|
||||
-- Without it, we may not evaluate the value until after we’ve exited
|
||||
-- the scope that `node` was allocated within, meaning `alloca` will
|
||||
-- free it & other stack data may overwrite it.
|
||||
range `seq` sourceSpan `seq` assignTerm language source (range :. categoryForLanguageProductionName language (toS name) :. sourceSpan :. Nil) children allChildren
|
||||
|
||||
isNonEmpty :: HasField fields Category => SyntaxTerm Text fields -> Bool
|
||||
isNonEmpty = (/= Empty) . category . extract
|
||||
|
||||
nodeRange :: Ptr Node -> Range
|
||||
nodeRange node = Range { start = fromIntegral (ts_node_p_start_byte node), end = fromIntegral (ts_node_p_end_byte node) }
|
||||
nodeRange :: Node -> Range
|
||||
nodeRange Node{..} = Range (fromIntegral nodeStartByte) (fromIntegral nodeEndByte)
|
||||
|
||||
nodeSpan :: Node -> SourceSpan
|
||||
nodeSpan Node{..} = nodeStartPoint `seq` nodeEndPoint `seq` SourceSpan (pointPos nodeStartPoint) (pointPos nodeEndPoint)
|
||||
where pointPos TSPoint{..} = pointRow `seq` pointColumn `seq` SourcePos (1 + fromIntegral pointRow) (1 + fromIntegral pointColumn)
|
||||
|
||||
assignTerm :: Language -> Source -> Record '[Range, Category, SourceSpan] -> [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO (SyntaxTerm Text '[ Range, Category, SourceSpan ])
|
||||
assignTerm language source annotation children allChildren =
|
||||
@ -81,7 +83,7 @@ assignTerm language source annotation children allChildren =
|
||||
Just a -> pure a
|
||||
_ -> defaultTermAssignment source (category annotation) children allChildren
|
||||
where assignTermByLanguage :: Language -> Source -> Category -> [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> Maybe (S.Syntax Text (SyntaxTerm Text '[ Range, Category, SourceSpan ]))
|
||||
assignTermByLanguage = \case
|
||||
assignTermByLanguage language = case language of
|
||||
JavaScript -> JS.termAssignment
|
||||
C -> C.termAssignment
|
||||
Language.Go -> Go.termAssignment
|
||||
@ -129,12 +131,13 @@ defaultTermAssignment source category children allChildren
|
||||
|
||||
|
||||
categoryForLanguageProductionName :: Language -> Text -> Category
|
||||
categoryForLanguageProductionName = withDefaults . \case
|
||||
JavaScript -> JS.categoryForJavaScriptProductionName
|
||||
C -> C.categoryForCProductionName
|
||||
Ruby -> Ruby.categoryForRubyName
|
||||
Language.Go -> Go.categoryForGoName
|
||||
_ -> Other
|
||||
where withDefaults productionMap = \case
|
||||
categoryForLanguageProductionName = withDefaults . byLanguage
|
||||
where withDefaults productionMap name = case name of
|
||||
"ERROR" -> ParseError
|
||||
s -> productionMap s
|
||||
byLanguage language = case language of
|
||||
JavaScript -> JS.categoryForJavaScriptProductionName
|
||||
C -> C.categoryForCProductionName
|
||||
Ruby -> Ruby.categoryForRubyName
|
||||
Language.Go -> Go.categoryForGoName
|
||||
_ -> Other
|
||||
|
193
test/GitmonClientSpec.hs
Normal file
193
test/GitmonClientSpec.hs
Normal file
@ -0,0 +1,193 @@
|
||||
module GitmonClientSpec where
|
||||
|
||||
import Control.Exception
|
||||
import Data.Aeson
|
||||
import Data.Aeson.Types
|
||||
import Data.ByteString.Char8 (ByteString, pack, unpack)
|
||||
import Data.Foldable
|
||||
import Data.HashMap.Lazy (empty)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Text hiding (empty)
|
||||
import Git.Libgit2
|
||||
import Git.Repository
|
||||
import Git.Types hiding (Object)
|
||||
import GitmonClient
|
||||
import Network.Socket hiding (recv)
|
||||
import Network.Socket.ByteString
|
||||
import Prelude hiding (lookup)
|
||||
import Prologue (liftIO, runReaderT)
|
||||
import System.Environment (setEnv)
|
||||
import Test.Hspec hiding (shouldBe, shouldSatisfy, shouldThrow, anyErrorCall)
|
||||
import Test.Hspec.Expectations.Pretty
|
||||
import Text.Regex
|
||||
|
||||
spec :: Spec
|
||||
spec =
|
||||
describe "gitmon" $ do
|
||||
let wd = "test/fixtures/git/examples/all-languages.git"
|
||||
realIP' = "127.0.0.1"
|
||||
repoName' = "examples/all-languages"
|
||||
|
||||
it "receives commands in order" . withSocketPair $ \(_, server, socketFactory) ->
|
||||
withRepository lgFactory wd $ do
|
||||
liftIO $ sendAll server "continue"
|
||||
object <- parseObjOid (Data.Text.pack "dfac8fd681b0749af137aebf3203e77a06fbafc2")
|
||||
commit <- reportGitmon' socketFactory "cat-file" $ lookupCommit object
|
||||
info <- liftIO $ recv server 1024
|
||||
|
||||
let [updateData, scheduleData, finishData] = infoToCommands info
|
||||
|
||||
liftIO $ do
|
||||
shouldBe (commitOid commit) object
|
||||
shouldBe updateData (Just "update")
|
||||
shouldBe scheduleData (Just "schedule")
|
||||
shouldBe finishData (Just "finish")
|
||||
|
||||
it "receives update command with correct data" . withSocketPair $ \(_, server, socketFactory) ->
|
||||
withRepository lgFactory wd $ do
|
||||
liftIO $ do
|
||||
traverse_ (uncurry setEnv) [("GIT_DIR", wd), ("GIT_SOCKSTAT_VAR_real_ip", realIP'), ("GIT_SOCKSTAT_VAR_repo_name", repoName'), ("GIT_SOCKSTAT_VAR_repo_id", "uint:10"), ("GIT_SOCKSTAT_VAR_user_id", "uint:20")]
|
||||
sendAll server "continue"
|
||||
|
||||
object <- parseObjOid (Data.Text.pack "dfac8fd681b0749af137aebf3203e77a06fbafc2")
|
||||
commit <- reportGitmon' socketFactory "cat-file" $ lookupCommit object
|
||||
info <- liftIO $ recv server 1024
|
||||
|
||||
let [updateData, _, finishData] = infoToData info
|
||||
|
||||
liftIO $ do
|
||||
shouldBe (commitOid commit) object
|
||||
shouldBe (either Just gitDir updateData) (Just wd)
|
||||
shouldBe (either id program updateData) "cat-file"
|
||||
shouldBe (either Just realIP updateData) (Just "127.0.0.1")
|
||||
shouldBe (either Just repoName updateData) (Just "examples/all-languages")
|
||||
shouldBe (either (const $ Just 1) repoID updateData) (Just 10)
|
||||
shouldBe (either (const $ Just 1) userID updateData) (Just 20)
|
||||
shouldBe (either id via updateData) "semantic-diff"
|
||||
|
||||
shouldSatisfy (either (const (-1)) cpu finishData) (>= 0)
|
||||
shouldSatisfy (either (const (-1)) diskReadBytes finishData) (>= 0)
|
||||
shouldSatisfy (either (const (-1)) diskWriteBytes finishData) (>= 0)
|
||||
shouldSatisfy (either (const (-1)) resultCode finishData) (>= 0)
|
||||
|
||||
it "reads Nothing for user_id and repo_id when valid prefix but invalid value" . withSocketPair $ \(_, server, socketFactory) ->
|
||||
withRepository lgFactory wd $ do
|
||||
liftIO $ do
|
||||
traverse_ (uncurry setEnv) [("GIT_DIR", wd), ("GIT_SOCKSTAT_VAR_real_ip", realIP'), ("GIT_SOCKSTAT_VAR_repo_name", repoName'), ("GIT_SOCKSTAT_VAR_repo_id", "uint:not_valid"), ("GIT_SOCKSTAT_VAR_user_id", "uint:not_valid")]
|
||||
sendAll server "continue"
|
||||
|
||||
object <- parseObjOid (Data.Text.pack "dfac8fd681b0749af137aebf3203e77a06fbafc2")
|
||||
_ <- reportGitmon' socketFactory "cat-file" $ lookupCommit object
|
||||
info <- liftIO $ recv server 1024
|
||||
|
||||
let [updateData, _, _] = infoToData info
|
||||
|
||||
liftIO $ do
|
||||
shouldBe (either (const $ Just 1) repoID updateData) Nothing
|
||||
shouldBe (either (const $ Just 1) userID updateData) Nothing
|
||||
|
||||
it "reads Nothing for user_id and repo_id when valid prefix but value is preceeded by invalid chars" . withSocketPair $ \(_, server, socketFactory) ->
|
||||
withRepository lgFactory wd $ do
|
||||
liftIO $ do
|
||||
traverse_ (uncurry setEnv) [("GIT_DIR", wd), ("GIT_SOCKSTAT_VAR_real_ip", realIP'), ("GIT_SOCKSTAT_VAR_repo_name", repoName'), ("GIT_SOCKSTAT_VAR_repo_id", "uint:abc100"), ("GIT_SOCKSTAT_VAR_user_id", "uint:abc100")]
|
||||
sendAll server "continue"
|
||||
|
||||
object <- parseObjOid (Data.Text.pack "dfac8fd681b0749af137aebf3203e77a06fbafc2")
|
||||
_ <- reportGitmon' socketFactory "cat-file" $ lookupCommit object
|
||||
info <- liftIO $ recv server 1024
|
||||
|
||||
let [updateData, _, _] = infoToData info
|
||||
|
||||
liftIO $ do
|
||||
shouldBe (either (const $ Just 1) repoID updateData) Nothing
|
||||
shouldBe (either (const $ Just 1) userID updateData) Nothing
|
||||
|
||||
it "reads Nothing for user_id and repo_id when valid prefix but value is proceeded by invalid chars" . withSocketPair $ \(_, server, socketFactory) ->
|
||||
withRepository lgFactory wd $ do
|
||||
liftIO $ do
|
||||
traverse_ (uncurry setEnv) [("GIT_DIR", wd), ("GIT_SOCKSTAT_VAR_real_ip", realIP'), ("GIT_SOCKSTAT_VAR_repo_name", repoName'), ("GIT_SOCKSTAT_VAR_repo_id", "uint:100abc"), ("GIT_SOCKSTAT_VAR_user_id", "uint:100abc")]
|
||||
sendAll server "continue"
|
||||
|
||||
object <- parseObjOid (Data.Text.pack "dfac8fd681b0749af137aebf3203e77a06fbafc2")
|
||||
_ <- reportGitmon' socketFactory "cat-file" $ lookupCommit object
|
||||
info <- liftIO $ recv server 1024
|
||||
|
||||
let [updateData, _, _] = infoToData info
|
||||
|
||||
liftIO $ do
|
||||
shouldBe (either (const $ Just 1) repoID updateData) Nothing
|
||||
shouldBe (either (const $ Just 1) userID updateData) Nothing
|
||||
|
||||
it "reads Nothing for user_id and repo_id when missing prefix but value is valid" . withSocketPair $ \(_, server, socketFactory) ->
|
||||
withRepository lgFactory wd $ do
|
||||
liftIO $ do
|
||||
traverse_ (uncurry setEnv) [("GIT_DIR", wd), ("GIT_SOCKSTAT_VAR_real_ip", realIP'), ("GIT_SOCKSTAT_VAR_repo_name", repoName'), ("GIT_SOCKSTAT_VAR_repo_id", "100"), ("GIT_SOCKSTAT_VAR_user_id", "100")]
|
||||
sendAll server "continue"
|
||||
|
||||
object <- parseObjOid (Data.Text.pack "dfac8fd681b0749af137aebf3203e77a06fbafc2")
|
||||
_ <- reportGitmon' socketFactory "cat-file" $ lookupCommit object
|
||||
info <- liftIO $ recv server 1024
|
||||
|
||||
let [updateData, _, _] = infoToData info
|
||||
|
||||
liftIO $ do
|
||||
shouldBe (either (const $ Just 1) repoID updateData) Nothing
|
||||
shouldBe (either (const $ Just 1) userID updateData) Nothing
|
||||
|
||||
it "returns the correct git result if the socket is unavailable" . withSocketPair $ \(client, server, socketFactory) ->
|
||||
withRepository lgFactory wd $ do
|
||||
liftIO $ close client
|
||||
|
||||
object <- parseObjOid (Data.Text.pack "dfac8fd681b0749af137aebf3203e77a06fbafc2")
|
||||
commit <- reportGitmon' socketFactory "cat-file" $ lookupCommit object
|
||||
info <- liftIO $ recv server 1024
|
||||
|
||||
liftIO $ shouldBe (commitOid commit) object
|
||||
liftIO $ shouldBe "" info
|
||||
|
||||
it "throws if schedule response is fail" . withSocketPair $ \(_, server, socketFactory) ->
|
||||
withRepository lgFactory wd $ do
|
||||
repo <- getRepository
|
||||
liftIO $ sendAll server "fail too busy"
|
||||
object <- parseObjOid (Data.Text.pack "dfac8fd681b0749af137aebf3203e77a06fbafc2")
|
||||
|
||||
liftIO $ shouldThrow (runReaderT (reportGitmon' socketFactory "cat-file" (lookupCommit object)) repo) gitmonException
|
||||
|
||||
gitmonException :: GitmonException -> Bool
|
||||
gitmonException = const True
|
||||
|
||||
withSocketPair :: ((Socket, Socket, SocketFactory) -> IO c) -> IO c
|
||||
withSocketPair = bracket create release
|
||||
where
|
||||
create = do
|
||||
(client, server) <- socketPair AF_UNIX Stream defaultProtocol
|
||||
pure (client, server, SocketFactory (\f -> f client))
|
||||
release (client, server, _) = do
|
||||
close client
|
||||
close server
|
||||
|
||||
infoToCommands :: ByteString -> [Maybe Text]
|
||||
infoToCommands input = command' . toObject <$> extract regex input
|
||||
where
|
||||
command' :: Object -> Maybe Text
|
||||
command' = parseMaybe (.: "command")
|
||||
|
||||
infoToData :: ByteString -> [Either String ProcessData]
|
||||
infoToData input = data' . toObject <$> extract regex input
|
||||
where
|
||||
data' = parseEither parser
|
||||
parser o = do
|
||||
dataO <- o .: "data"
|
||||
asum [ ProcessUpdateData <$> (dataO .: "git_dir") <*> (dataO .: "program") <*> (dataO .:? "real_ip") <*> (dataO .:? "repo_name") <*> (dataO .:? "repo_id") <*> (dataO .:? "user_id") <*> (dataO .: "via")
|
||||
, ProcessFinishData <$> (dataO .: "cpu") <*> (dataO .: "disk_read_bytes") <*> (dataO .: "disk_write_bytes") <*> (dataO .: "result_code")
|
||||
, pure ProcessScheduleData
|
||||
]
|
||||
|
||||
toObject :: ByteString -> Object
|
||||
toObject input = fromMaybe empty (decodeStrict input)
|
||||
|
||||
regex :: Regex
|
||||
regex = mkRegexWithOpts "(\\{.*\"update\".*\"\\}\\})(\\{.*\"schedule\"\\})(\\{.*\"finish\".*\\}\\})" False True
|
||||
|
||||
extract :: Regex -> ByteString -> [ByteString]
|
||||
extract regex input = Data.ByteString.Char8.pack <$> fromMaybe [""] (matchRegex regex (Data.ByteString.Char8.unpack input))
|
36
test/Spec.hs
36
test/Spec.hs
@ -6,6 +6,7 @@ import qualified Data.Mergeable.Spec
|
||||
import qualified Data.RandomWalkSimilarity.Spec
|
||||
import qualified DiffSpec
|
||||
import qualified SummarySpec
|
||||
import qualified GitmonClientSpec
|
||||
import qualified InterpreterSpec
|
||||
import qualified PatchOutputSpec
|
||||
import qualified RangeSpec
|
||||
@ -19,19 +20,22 @@ import qualified IntegrationSpec
|
||||
import Test.Hspec
|
||||
|
||||
main :: IO ()
|
||||
main = hspec . parallel $ do
|
||||
describe "Alignment" AlignmentSpec.spec
|
||||
describe "Data.Mergeable" Data.Mergeable.Spec.spec
|
||||
describe "Data.RandomWalkSimilarity" Data.RandomWalkSimilarity.Spec.spec
|
||||
describe "Diff" DiffSpec.spec
|
||||
describe "Summary" SummarySpec.spec
|
||||
describe "Interpreter" InterpreterSpec.spec
|
||||
describe "PatchOutput" PatchOutputSpec.spec
|
||||
describe "Range" RangeSpec.spec
|
||||
describe "SES.Myers" SES.Myers.Spec.spec
|
||||
describe "Source" SourceSpec.spec
|
||||
describe "Term" TermSpec.spec
|
||||
describe "TOC" TOCSpec.spec
|
||||
describe "DiffCommand" DiffCommandSpec.spec
|
||||
describe "ParseCommand" ParseCommandSpec.spec
|
||||
describe "Integration" IntegrationSpec.spec
|
||||
main = do
|
||||
hspec . parallel $ do
|
||||
describe "Alignment" AlignmentSpec.spec
|
||||
describe "Data.Mergeable" Data.Mergeable.Spec.spec
|
||||
describe "Data.RandomWalkSimilarity" Data.RandomWalkSimilarity.Spec.spec
|
||||
describe "Diff" DiffSpec.spec
|
||||
describe "Summary" SummarySpec.spec
|
||||
describe "Interpreter" InterpreterSpec.spec
|
||||
describe "PatchOutput" PatchOutputSpec.spec
|
||||
describe "Range" RangeSpec.spec
|
||||
describe "SES.Myers" SES.Myers.Spec.spec
|
||||
describe "Source" SourceSpec.spec
|
||||
describe "Term" TermSpec.spec
|
||||
describe "TOC" TOCSpec.spec
|
||||
describe "DiffCommand" DiffCommandSpec.spec
|
||||
describe "ParseCommand" ParseCommandSpec.spec
|
||||
describe "Integration" IntegrationSpec.spec
|
||||
|
||||
hspec $ describe "GitmonClient" GitmonClientSpec.spec
|
2
vendor/haskell-tree-sitter
vendored
2
vendor/haskell-tree-sitter
vendored
@ -1 +1 @@
|
||||
Subproject commit c70bc6dafcbdc572082c46345e6425508ceaf43f
|
||||
Subproject commit 8dccb62e0f2859baea83016f62f99b78c70a9e87
|
Loading…
Reference in New Issue
Block a user