1
1
mirror of https://github.com/github/semantic.git synced 2024-12-22 22:31:36 +03:00
semantic/src/GitmonClient.hs

169 lines
7.4 KiB
Haskell
Raw Normal View History

2017-03-28 00:15:58 +03:00
-- | We use BangPatterns to force evaluation of git operations to preserve accuracy in measuring system stats (particularly disk read bytes)
{-# LANGUAGE RecordWildCards, DeriveGeneric, RankNTypes, BangPatterns #-}
2017-02-15 04:42:35 +03:00
module GitmonClient where
2017-03-08 21:30:32 +03:00
import Control.Exception (throw)
2017-02-15 04:42:35 +03:00
import Data.Aeson
import Data.Aeson.Types
2017-02-24 01:03:26 +03:00
import Data.ByteString.Lazy (toStrict)
2017-03-24 23:14:54 +03:00
import Data.Char (toLower)
2017-03-27 20:11:09 +03:00
import Data.Text (unpack, isInfixOf)
2017-03-01 02:27:33 +03:00
import qualified Data.Yaml as Y
2017-02-24 01:03:26 +03:00
import GHC.Generics
2017-02-22 22:15:58 +03:00
import Git.Libgit2
2017-03-03 00:16:38 +03:00
import Network.Socket hiding (recv)
import Network.Socket.ByteString (sendAll, recv)
2017-02-24 01:03:26 +03:00
import Prelude
import Prologue hiding (toStrict, map, print, show)
import System.Clock
2017-03-01 02:27:33 +03:00
import System.Environment
import System.Timeout
import Text.Regex
2017-02-15 04:42:35 +03:00
2017-03-08 21:30:32 +03:00
newtype GitmonException = GitmonException String deriving (Show, Typeable)
instance Exception GitmonException
2017-03-27 20:21:28 +03:00
data ProcIO = ProcIO { readBytes :: Integer
, writeBytes :: Integer } deriving (Show, Generic)
instance FromJSON ProcIO
2017-03-27 20:21:28 +03:00
instance ToJSON ProcIO where
toJSON ProcIO{..} = object [ "read_bytes" .= readBytes, "write_bytes" .= writeBytes ]
2017-03-28 19:41:16 +03:00
data ProcessData = ProcessUpdateData { gitDir :: Maybe String
2017-03-03 00:35:51 +03:00
, program :: String
, realIP :: Maybe String
, repoName :: Maybe String
, repoID :: Maybe Int
, userID :: Maybe Int
2017-03-03 00:35:51 +03:00
, via :: String }
| ProcessScheduleData
| ProcessFinishData { cpu :: Integer
, diskReadBytes :: Integer
, diskWriteBytes :: Integer
, resultCode :: Integer } deriving (Generic, Show)
2017-02-23 00:34:28 +03:00
2017-03-01 02:46:29 +03:00
instance ToJSON ProcessData where
toJSON ProcessUpdateData{..} = object [ "git_dir" .= gitDir, "program" .= program, "repo_name" .= repoName, "real_ip" .= realIP, "repo_id" .= repoID, "user_id" .= userID, "via" .= via ]
2017-03-28 19:44:19 +03:00
toJSON ProcessScheduleData = object []
toJSON ProcessFinishData{..} = object [ "cpu" .= cpu, "disk_read_bytes" .= diskReadBytes, "disk_write_bytes" .= diskWriteBytes, "result_code" .= resultCode ]
2017-02-15 04:42:35 +03:00
2017-02-24 01:04:24 +03:00
2017-02-15 04:42:35 +03:00
data GitmonCommand = Update
| Finish
| Schedule deriving (Generic, Show)
2017-02-15 04:42:35 +03:00
instance ToJSON GitmonCommand where
2017-03-24 23:14:54 +03:00
toJSON = genericToJSON defaultOptions { constructorTagModifier = map toLower }
2017-02-15 04:42:35 +03:00
2017-02-24 01:04:24 +03:00
2017-03-03 00:35:51 +03:00
data GitmonMsg = GitmonMsg { command :: GitmonCommand
, processData :: ProcessData } deriving (Show)
2017-02-15 04:42:35 +03:00
instance ToJSON GitmonMsg where
toJSON GitmonMsg{..} = case command of
Update -> object ["command" .= ("update" :: String), "data" .= processData]
Finish -> object ["command" .= ("finish" :: String), "data" .= processData]
Schedule -> object ["command" .= ("schedule" :: String)]
2017-03-02 01:06:58 +03:00
2017-03-03 01:05:50 +03:00
type ProcInfo = Either Y.ParseException (Maybe ProcIO)
newtype SocketFactory = SocketFactory { withSocket :: forall a. (Socket -> IO a) -> IO a }
2017-03-07 00:51:38 +03:00
reportGitmon :: String -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
reportGitmon = reportGitmon' SocketFactory { withSocket = withGitmonSocket }
reportGitmon' :: SocketFactory -> String -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
reportGitmon' SocketFactory{..} program gitCommand =
2017-03-24 21:29:38 +03:00
join . liftIO . withSocket $ \sock -> do
2017-03-28 19:41:16 +03:00
[gitDir, realIP, repoName, repoID, userID] <- traverse lookupEnv ["GIT_DIR", "GIT_SOCKSTAT_VAR_real_ip", "GIT_SOCKSTAT_VAR_repo_name", "GIT_SOCKSTAT_VAR_repo_id", "GIT_SOCKSTAT_VAR_user_id"]
void . safeGitmonIO . sendAll sock $ processJSON Update (ProcessUpdateData gitDir program realIP repoName (readIntFromEnv repoID) (readIntFromEnv userID) "semantic-diff")
2017-03-24 21:29:38 +03:00
void . safeGitmonIO . sendAll sock $ processJSON Schedule ProcessScheduleData
gitmonStatus <- safeGitmonIO $ recv sock 1024
(startTime, beforeProcIOContents) <- collectStats
2017-03-28 00:15:58 +03:00
-- | The result of the gitCommand is strictly evaluated (to next normal form). This is not equivalent to a `deepseq`. The underlying `Git.Types` do not have instances of `NFData` preventing us from using `deepseq` at this time.
let !result = withGitmonStatus gitmonStatus gitCommand
(afterTime, afterProcIOContents) <- collectStats
let (cpuTime, diskReadBytes, diskWriteBytes, resultCode) = procStats startTime afterTime beforeProcIOContents afterProcIOContents
2017-03-24 21:29:38 +03:00
void . safeGitmonIO . sendAll sock $ processJSON Finish (ProcessFinishData cpuTime diskReadBytes diskWriteBytes resultCode)
pure result
2017-03-07 00:51:38 +03:00
where
2017-03-08 21:49:44 +03:00
withGitmonStatus :: Maybe ByteString -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
withGitmonStatus maybeGitmonStatus gitCommand = case maybeGitmonStatus of
Just gitmonStatus | "fail" `isInfixOf` decodeUtf8 gitmonStatus -> throwGitmonException gitmonStatus
_ -> gitCommand
throwGitmonException :: ByteString -> e
2017-03-08 22:10:57 +03:00
throwGitmonException command = throw . GitmonException . unpack $ "Received: '" <> decodeUtf8 command <> "' from Gitmon"
2017-03-08 21:49:44 +03:00
2017-03-07 00:51:38 +03:00
collectStats :: IO (TimeSpec, ProcInfo)
collectStats = do
time <- getTime clock
procIOContents <- Y.decodeFileEither procFileAddr :: IO ProcInfo
pure (time, procIOContents)
procStats :: TimeSpec -> TimeSpec -> ProcInfo -> ProcInfo -> ( Integer, Integer, Integer, Integer )
procStats beforeTime afterTime beforeProcIOContents afterProcIOContents = ( cpuTime, diskReadBytes, diskWriteBytes, resultCode )
where
-- | toNanoSecs converts TimeSpec to Integer, and we further convert this value to milliseconds (expected by Gitmon).
cpuTime = div (1 * 1000 * 1000) . toNanoSecs $ afterTime - beforeTime
2017-03-27 20:21:28 +03:00
beforeDiskReadBytes = either (const 0) (maybe 0 readBytes) beforeProcIOContents
afterDiskReadBytes = either (const 0) (maybe 0 readBytes) afterProcIOContents
beforeDiskWriteBytes = either (const 0) (maybe 0 writeBytes) beforeProcIOContents
afterDiskWriteBytes = either (const 0) (maybe 0 writeBytes) afterProcIOContents
2017-03-07 00:51:38 +03:00
diskReadBytes = afterDiskReadBytes - beforeDiskReadBytes
diskWriteBytes = afterDiskWriteBytes - beforeDiskWriteBytes
resultCode = 0
2017-03-28 19:41:16 +03:00
readIntFromEnv :: Maybe String -> Maybe Int
readIntFromEnv Nothing = Nothing
readIntFromEnv (Just s) = readInt $ matchRegex regex s
where
2017-03-28 19:41:16 +03:00
-- | Expected format for userID and repoID is: "uint:123",
-- where "uint:" indicates an unsigned integer followed by an integer value.
regex :: Regex
regex = mkRegexWithOpts "^uint:([0-9]+)$" False True
readInt :: Maybe [String] -> Maybe Int
readInt (Just [s]) = Just (read s :: Int)
readInt _ = Nothing
withGitmonSocket :: (Socket -> IO c) -> IO c
withGitmonSocket = bracket connectSocket close
2017-03-07 00:51:38 +03:00
where
connectSocket = do
s <- socket AF_UNIX Stream defaultProtocol
2017-03-27 20:11:30 +03:00
void . safeGitmonIO $ connect s (SockAddrUnix gitmonSocketAddr)
2017-03-07 00:51:38 +03:00
pure s
2017-03-03 01:05:50 +03:00
2017-03-28 00:16:08 +03:00
-- | Timeout in nanoseconds to wait before giving up on Gitmon response to schedule.
2017-03-03 00:35:51 +03:00
gitmonTimeout :: Int
gitmonTimeout = 1 * 1000 * 1000
gitmonSocketAddr :: String
gitmonSocketAddr = "/tmp/gitstats.sock"
safeGitmonIO :: MonadIO m => IO a -> m (Maybe a)
safeGitmonIO command = liftIO $ timeout gitmonTimeout command `catch` logError
logError :: IOException -> IO (Maybe a)
logError _ = pure Nothing
2017-03-03 00:35:51 +03:00
procFileAddr :: String
procFileAddr = "/proc/self/io"
clock :: Clock
clock = Realtime
processJSON :: GitmonCommand -> ProcessData -> ByteString
2017-03-27 20:21:28 +03:00
processJSON command processData = toStrict . encode $ GitmonMsg command processData
2017-03-03 00:35:51 +03:00