1
1
mirror of https://github.com/github/semantic.git synced 2024-12-22 14:21:31 +03:00
semantic/src/GitmonClient.hs
Rick Winfrey c7f6b25723 Swallow errors for now
- Outputting IO exceptions in the GitmonClient are causing problems for
profiling work.
2017-03-28 10:32:44 -07:00

169 lines
7.4 KiB
Haskell

-- | We use BangPatterns to force evaluation of git operations to preserve accuracy in measuring system stats (particularly disk read bytes)
{-# LANGUAGE RecordWildCards, DeriveGeneric, RankNTypes, BangPatterns #-}
module GitmonClient where
import Control.Exception (throw)
import Data.Aeson
import Data.Aeson.Types
import Data.ByteString.Lazy (toStrict)
import Data.Char (toLower)
import Data.Text (unpack, isInfixOf)
import qualified Data.Yaml as Y
import GHC.Generics
import Git.Libgit2
import Network.Socket hiding (recv)
import Network.Socket.ByteString (sendAll, recv)
import Prelude
import Prologue hiding (toStrict, map, print, show)
import System.Clock
import System.Environment
import System.Timeout
import Text.Regex
newtype GitmonException = GitmonException String deriving (Show, Typeable)
instance Exception GitmonException
data ProcIO = ProcIO { readBytes :: Integer
, writeBytes :: Integer } deriving (Show, Generic)
instance FromJSON ProcIO
instance ToJSON ProcIO where
toJSON ProcIO{..} = object [ "read_bytes" .= readBytes, "write_bytes" .= writeBytes ]
data ProcessData = ProcessUpdateData { gitDir :: Maybe String
, program :: String
, realIP :: Maybe String
, repoName :: Maybe String
, repoID :: Maybe Int
, userID :: Maybe Int
, via :: String }
| ProcessScheduleData
| ProcessFinishData { cpu :: Integer
, diskReadBytes :: Integer
, diskWriteBytes :: Integer
, resultCode :: Integer } deriving (Generic, Show)
instance ToJSON ProcessData where
toJSON ProcessUpdateData{..} = object [ "git_dir" .= gitDir, "program" .= program, "repo_name" .= repoName, "real_ip" .= realIP, "repo_id" .= repoID, "user_id" .= userID, "via" .= via ]
toJSON ProcessScheduleData = object []
toJSON ProcessFinishData{..} = object [ "cpu" .= cpu, "disk_read_bytes" .= diskReadBytes, "disk_write_bytes" .= diskWriteBytes, "result_code" .= resultCode ]
data GitmonCommand = Update
| Finish
| Schedule deriving (Generic, Show)
instance ToJSON GitmonCommand where
toJSON = genericToJSON defaultOptions { constructorTagModifier = map toLower }
data GitmonMsg = GitmonMsg { command :: GitmonCommand
, processData :: ProcessData } deriving (Show)
instance ToJSON GitmonMsg where
toJSON GitmonMsg{..} = case command of
Update -> object ["command" .= ("update" :: String), "data" .= processData]
Finish -> object ["command" .= ("finish" :: String), "data" .= processData]
Schedule -> object ["command" .= ("schedule" :: String)]
type ProcInfo = Either Y.ParseException (Maybe ProcIO)
newtype SocketFactory = SocketFactory { withSocket :: forall a. (Socket -> IO a) -> IO a }
reportGitmon :: String -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
reportGitmon = reportGitmon' SocketFactory { withSocket = withGitmonSocket }
reportGitmon' :: SocketFactory -> String -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
reportGitmon' SocketFactory{..} program gitCommand =
join . liftIO . withSocket $ \sock -> do
[gitDir, realIP, repoName, repoID, userID] <- traverse lookupEnv ["GIT_DIR", "GIT_SOCKSTAT_VAR_real_ip", "GIT_SOCKSTAT_VAR_repo_name", "GIT_SOCKSTAT_VAR_repo_id", "GIT_SOCKSTAT_VAR_user_id"]
void . safeGitmonIO . sendAll sock $ processJSON Update (ProcessUpdateData gitDir program realIP repoName (readIntFromEnv repoID) (readIntFromEnv userID) "semantic-diff")
void . safeGitmonIO . sendAll sock $ processJSON Schedule ProcessScheduleData
gitmonStatus <- safeGitmonIO $ recv sock 1024
(startTime, beforeProcIOContents) <- collectStats
-- | The result of the gitCommand is strictly evaluated (to next normal form). This is not equivalent to a `deepseq`. The underlying `Git.Types` do not have instances of `NFData` preventing us from using `deepseq` at this time.
let !result = withGitmonStatus gitmonStatus gitCommand
(afterTime, afterProcIOContents) <- collectStats
let (cpuTime, diskReadBytes, diskWriteBytes, resultCode) = procStats startTime afterTime beforeProcIOContents afterProcIOContents
void . safeGitmonIO . sendAll sock $ processJSON Finish (ProcessFinishData cpuTime diskReadBytes diskWriteBytes resultCode)
pure result
where
withGitmonStatus :: Maybe ByteString -> ReaderT LgRepo IO a -> ReaderT LgRepo IO a
withGitmonStatus maybeGitmonStatus gitCommand = case maybeGitmonStatus of
Just gitmonStatus | "fail" `isInfixOf` decodeUtf8 gitmonStatus -> throwGitmonException gitmonStatus
_ -> gitCommand
throwGitmonException :: ByteString -> e
throwGitmonException command = throw . GitmonException . unpack $ "Received: '" <> decodeUtf8 command <> "' from Gitmon"
collectStats :: IO (TimeSpec, ProcInfo)
collectStats = do
time <- getTime clock
procIOContents <- Y.decodeFileEither procFileAddr :: IO ProcInfo
pure (time, procIOContents)
procStats :: TimeSpec -> TimeSpec -> ProcInfo -> ProcInfo -> ( Integer, Integer, Integer, Integer )
procStats beforeTime afterTime beforeProcIOContents afterProcIOContents = ( cpuTime, diskReadBytes, diskWriteBytes, resultCode )
where
-- | toNanoSecs converts TimeSpec to Integer, and we further convert this value to milliseconds (expected by Gitmon).
cpuTime = div (1 * 1000 * 1000) . toNanoSecs $ afterTime - beforeTime
beforeDiskReadBytes = either (const 0) (maybe 0 readBytes) beforeProcIOContents
afterDiskReadBytes = either (const 0) (maybe 0 readBytes) afterProcIOContents
beforeDiskWriteBytes = either (const 0) (maybe 0 writeBytes) beforeProcIOContents
afterDiskWriteBytes = either (const 0) (maybe 0 writeBytes) afterProcIOContents
diskReadBytes = afterDiskReadBytes - beforeDiskReadBytes
diskWriteBytes = afterDiskWriteBytes - beforeDiskWriteBytes
resultCode = 0
readIntFromEnv :: Maybe String -> Maybe Int
readIntFromEnv Nothing = Nothing
readIntFromEnv (Just s) = readInt $ matchRegex regex s
where
-- | Expected format for userID and repoID is: "uint:123",
-- where "uint:" indicates an unsigned integer followed by an integer value.
regex :: Regex
regex = mkRegexWithOpts "^uint:([0-9]+)$" False True
readInt :: Maybe [String] -> Maybe Int
readInt (Just [s]) = Just (read s :: Int)
readInt _ = Nothing
withGitmonSocket :: (Socket -> IO c) -> IO c
withGitmonSocket = bracket connectSocket close
where
connectSocket = do
s <- socket AF_UNIX Stream defaultProtocol
void . safeGitmonIO $ connect s (SockAddrUnix gitmonSocketAddr)
pure s
-- | Timeout in nanoseconds to wait before giving up on Gitmon response to schedule.
gitmonTimeout :: Int
gitmonTimeout = 1 * 1000 * 1000
gitmonSocketAddr :: String
gitmonSocketAddr = "/tmp/gitstats.sock"
safeGitmonIO :: MonadIO m => IO a -> m (Maybe a)
safeGitmonIO command = liftIO $ timeout gitmonTimeout command `catch` logError
logError :: IOException -> IO (Maybe a)
logError _ = pure Nothing
procFileAddr :: String
procFileAddr = "/proc/self/io"
clock :: Clock
clock = Realtime
processJSON :: GitmonCommand -> ProcessData -> ByteString
processJSON command processData = toStrict . encode $ GitmonMsg command processData