1
1
mirror of https://github.com/github/semantic.git synced 2025-01-09 00:56:32 +03:00
semantic/src/GitmonClient.hs

184 lines
7.9 KiB
Haskell
Raw Normal View History

2017-03-10 00:52:30 +03:00
-- | We use BangPatterns to force evaluation of git operations to preserve accuracy in measuring system stats (particularly disk read bytes)
{-# LANGUAGE RecordWildCards, BangPatterns, DeriveGeneric, RankNTypes #-}
2017-03-02 21:27:16 +03:00
{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
2017-02-15 04:42:35 +03:00
module GitmonClient where
2017-03-08 21:30:32 +03:00
import Control.Exception (throw)
2017-02-15 04:42:35 +03:00
import Data.Aeson
import Data.Aeson.Types
2017-02-24 01:03:26 +03:00
import Data.ByteString.Lazy (toStrict)
2017-03-03 00:16:38 +03:00
import Data.Text (pack, unpack, toLower, isInfixOf)
2017-03-01 02:27:33 +03:00
import qualified Data.Yaml as Y
2017-02-24 01:03:26 +03:00
import GHC.Generics
2017-02-22 22:15:58 +03:00
import Git.Libgit2
2017-03-03 00:16:38 +03:00
import Network.Socket hiding (recv)
import Network.Socket.ByteString (sendAll, recv)
2017-02-24 01:03:26 +03:00
import Prelude
2017-03-11 02:30:43 +03:00
import Prologue hiding (toStrict)
import System.Clock
2017-03-01 02:27:33 +03:00
import System.Directory (getCurrentDirectory)
import System.Environment
import System.Timeout
import Text.Regex
2017-02-15 04:42:35 +03:00
2017-03-08 21:30:32 +03:00
newtype GitmonException = GitmonException String deriving (Show, Typeable)
instance Exception GitmonException
2017-03-03 00:35:51 +03:00
data ProcIO = ProcIO { read_bytes :: Integer
, write_bytes :: Integer } deriving (Show, Generic)
instance FromJSON ProcIO
2017-03-03 00:35:51 +03:00
data ProcessData = ProcessUpdateData { gitDir :: String
, program :: String
, realIP :: Maybe String
, repoName :: Maybe String
, repoID :: Maybe Int
, userID :: Maybe Int
2017-03-03 00:35:51 +03:00
, via :: String }
| ProcessScheduleData
| ProcessFinishData { cpu :: Integer
, diskReadBytes :: Integer
, diskWriteBytes :: Integer
, resultCode :: Integer } deriving (Generic, Show)
2017-02-23 00:34:28 +03:00
2017-03-01 02:46:29 +03:00
instance ToJSON ProcessData where
toJSON ProcessUpdateData{..} = object [ "git_dir" .= gitDir, "program" .= program, "repo_name" .= repoName, "real_ip" .= realIP, "repo_id" .= repoID, "user_id" .= userID, "via" .= via ]
toJSON ProcessScheduleData = object []
toJSON ProcessFinishData{..} = object [ "cpu" .= cpu, "disk_read_bytes" .= diskReadBytes, "disk_write_bytes" .= diskWriteBytes, "result_code" .= resultCode ]
2017-02-15 04:42:35 +03:00
2017-02-24 01:04:24 +03:00
2017-02-15 04:42:35 +03:00
data GitmonCommand = Update
| Finish
| Schedule deriving (Generic, Show)
2017-02-15 04:42:35 +03:00
instance ToJSON GitmonCommand where
toJSON = genericToJSON defaultOptions { constructorTagModifier = unpack . toLower . pack }
2017-02-15 04:42:35 +03:00
2017-02-24 01:04:24 +03:00
2017-03-03 00:35:51 +03:00
data GitmonMsg = GitmonMsg { command :: GitmonCommand
, processData :: ProcessData } deriving (Show)
2017-02-15 04:42:35 +03:00
instance ToJSON GitmonMsg where
toJSON GitmonMsg{..} = case command of
Update -> object ["command" .= ("update" :: String), "data" .= processData]
Finish -> object ["command" .= ("finish" :: String), "data" .= processData]
Schedule -> object ["command" .= ("schedule" :: String)]
2017-03-02 01:06:58 +03:00
2017-03-03 01:05:50 +03:00
type ProcInfo = Either Y.ParseException (Maybe ProcIO)
newtype SocketFactory = SocketFactory { withSocket :: forall a. (Socket -> IO a) -> IO a }
2017-03-07 00:51:38 +03:00
reportGitmon :: String -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
reportGitmon = reportGitmon' SocketFactory { withSocket = withGitmonSocket }
reportGitmon' :: SocketFactory -> String -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
reportGitmon' SocketFactory{..} program gitCommand =
join . liftIO . withSocket $ \socket' -> do
(gitDir, realIP, repoName, repoID, userID) <- loadEnvVars
safeGitmonIO . sendAll socket' $ processJSON Update (ProcessUpdateData gitDir program realIP repoName repoID userID "semantic-diff")
safeGitmonIO . sendAll socket' $ processJSON Schedule ProcessScheduleData
gitmonStatus <- safeGitmonIO $ recv socket' 1024
(startTime, beforeProcIOContents) <- collectStats
-- | We are eagerly evaluating the gitCommand with BangPatterns. This is to preserve accuracy in measuring the process stats calculated, in particular disk read bytes.
let !result = withGitmonStatus gitmonStatus gitCommand
(afterTime, afterProcIOContents) <- collectStats
let (cpuTime, diskReadBytes, diskWriteBytes, resultCode) = procStats startTime afterTime beforeProcIOContents afterProcIOContents
safeGitmonIO . sendAll socket' $ processJSON Finish (ProcessFinishData cpuTime diskReadBytes diskWriteBytes resultCode)
pure result
2017-03-07 00:51:38 +03:00
where
2017-03-08 21:49:44 +03:00
withGitmonStatus :: Maybe ByteString -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
withGitmonStatus maybeGitmonStatus gitCommand = case maybeGitmonStatus of
Just gitmonStatus | "fail" `isInfixOf` decodeUtf8 gitmonStatus -> throwGitmonException gitmonStatus
_ -> gitCommand
throwGitmonException :: ByteString -> e
2017-03-08 22:10:57 +03:00
throwGitmonException command = throw . GitmonException . unpack $ "Received: '" <> decodeUtf8 command <> "' from Gitmon"
2017-03-08 21:49:44 +03:00
2017-03-07 00:51:38 +03:00
collectStats :: IO (TimeSpec, ProcInfo)
collectStats = do
time <- getTime clock
procIOContents <- Y.decodeFileEither procFileAddr :: IO ProcInfo
pure (time, procIOContents)
procStats :: TimeSpec -> TimeSpec -> ProcInfo -> ProcInfo -> ( Integer, Integer, Integer, Integer )
procStats beforeTime afterTime beforeProcIOContents afterProcIOContents = ( cpuTime, diskReadBytes, diskWriteBytes, resultCode )
where
-- | toNanoSecs converts TimeSpec to Integer, and we further convert this value to milliseconds (expected by Gitmon).
cpuTime = div (1 * 1000 * 1000) . toNanoSecs $ afterTime - beforeTime
2017-03-07 00:51:38 +03:00
beforeDiskReadBytes = either (const 0) (maybe 0 read_bytes) beforeProcIOContents
afterDiskReadBytes = either (const 0) (maybe 0 read_bytes) afterProcIOContents
beforeDiskWriteBytes = either (const 0) (maybe 0 write_bytes) beforeProcIOContents
afterDiskWriteBytes = either (const 0) (maybe 0 write_bytes) afterProcIOContents
diskReadBytes = afterDiskReadBytes - beforeDiskReadBytes
diskWriteBytes = afterDiskWriteBytes - beforeDiskWriteBytes
resultCode = 0
loadEnvVars :: IO (String, Maybe String, Maybe String, Maybe Int, Maybe Int)
2017-03-07 00:51:38 +03:00
loadEnvVars = do
pwd <- safeGetCurrentDirectory
2017-03-07 00:51:38 +03:00
gitDir <- fromMaybe pwd <$> lookupEnv "GIT_DIR"
realIP <- lookupEnv "GIT_SOCKSTAT_VAR_real_ip"
repoName <- lookupEnv "GIT_SOCKSTAT_VAR_repo_name"
repoID <- lookupEnv "GIT_SOCKSTAT_VAR_repo_id"
userID <- lookupEnv "GIT_SOCKSTAT_VAR_user_id"
pure (gitDir, realIP, repoName, readIntFromEnv repoID, readIntFromEnv userID)
where
safeGetCurrentDirectory :: IO String
safeGetCurrentDirectory = getCurrentDirectory `catch` handleIOException
handleIOException :: IOException -> IO String
handleIOException _ = pure ""
2017-03-07 00:51:38 +03:00
readIntFromEnv :: Maybe String -> Maybe Int
readIntFromEnv Nothing = Nothing
readIntFromEnv (Just s) = readInt $ matchRegex regex s
where
-- | Expected format for userID and repoID is: "uint:123",
-- | where "uint:" indicates an unsigned integer followed by an integer value.
regex :: Regex
regex = mkRegex "^uint:([0-9]+)$"
readInt :: Maybe [String] -> Maybe Int
readInt (Just [s]) = Just (read s :: Int)
readInt _ = Nothing
withGitmonSocket :: (Socket -> IO c) -> IO c
withGitmonSocket = bracket connectSocket close
2017-03-07 00:51:38 +03:00
where
connectSocket = do
s <- socket AF_UNIX Stream defaultProtocol
2017-03-08 22:35:06 +03:00
timeout gitmonTimeout $ connect s (SockAddrUnix gitmonSocketAddr)
2017-03-07 00:51:38 +03:00
pure s
2017-03-03 01:05:50 +03:00
2017-03-03 00:35:51 +03:00
-- Timeout in nanoseconds to wait before giving up on Gitmon response to schedule.
gitmonTimeout :: Int
gitmonTimeout = 1 * 1000 * 1000
gitmonSocketAddr :: String
gitmonSocketAddr = "/tmp/gitstats.sock"
safeGitmonIO :: MonadIO m => IO a -> m (Maybe a)
safeGitmonIO command = liftIO $ timeout gitmonTimeout command `catch` noop
noop :: IOException -> IO (Maybe a)
noop _ = pure Nothing
2017-03-03 00:35:51 +03:00
procFileAddr :: String
procFileAddr = "/proc/self/io"
clock :: Clock
clock = Realtime
processJSON :: GitmonCommand -> ProcessData -> ByteString
processJSON command processData = (toStrict . encode $ GitmonMsg command processData) <> "\n"