king: add partial-replay and --dry-from

partial-replay allows you to replay up to event N and make a checkpoint
there.  --dry-from replays to event N and then starts in dry-run mode
from that point, so you can attach a console.

The intended debug flow is to use partial-replay to make a checkpoint
near when a bug is triggered, then use event-browser and --dry-from to
drill down and find exactly what triggered the bug.

Unfortunately, if the bug was triggered by crash handling, you do not
have the original event, only the injected replacement event.  This is
still useful, though.
This commit is contained in:
Philip Monk 2020-01-10 14:07:29 -08:00
parent 06934959ca
commit 8a8341a783
No known key found for this signature in database
GPG Key ID: B66E1F02604E44EC
5 changed files with 112 additions and 29 deletions

View File

@ -18,6 +18,7 @@ data Opts = Opts
, oHashless :: Bool
, oExit :: Bool
, oDryRun :: Bool
, oDryFrom :: Maybe Word64
, oVerbose :: Bool
, oAmesPort :: Maybe Word16
, oTrace :: Bool
@ -75,6 +76,10 @@ data Bug
, bFirstEvt :: Word64
, bFinalEvt :: Word64
}
| ReplayEvents
{ bPierPath :: FilePath
, bFinalEvt :: Word64
}
| CheckDawn
{ bKeyfilePath :: FilePath
}
@ -199,11 +204,10 @@ new = do
opts :: Parser Opts
opts = do
oAmesPort <- option auto $ metavar "PORT"
oAmesPort <- optional $ option auto $ metavar "PORT"
<> short 'p'
<> long "ames"
<> help "Ames port number"
<> value Nothing
<> hidden
-- Always disable hashboard. Right now, urbit is almost unusable with this
@ -233,10 +237,15 @@ opts = do
<> help "Persist no events and turn off Ames networking"
<> hidden
oTrace <- switch $ short 't'
<> long "trace"
<> help "Enable tracing"
<> hidden
oDryFrom <- optional $ option auto $ metavar "EVENT"
<> long "dry-from"
<> help "Dry run from event number"
<> hidden
oTrace <- switch $ short 't'
<> long "trace"
<> help "Enable tracing"
<> hidden
oLocalhost <- switch $ short 'L'
<> long "local"
@ -293,7 +302,7 @@ firstEv = option auto $ long "first"
lastEv :: Parser Word64
lastEv = option auto $ long "last"
<> metavar "LAS"
<> help "anding with event LAS"
<> help "ending with event LAS"
<> value maxBound
checkEvs :: Parser Bug
@ -302,6 +311,9 @@ checkEvs = ValidateEvents <$> pierPath <*> firstEv <*> lastEv
checkFx :: Parser Bug
checkFx = ValidateFX <$> pierPath <*> firstEv <*> lastEv
replayEvs :: Parser Bug
replayEvs = ReplayEvents <$> pierPath <*> lastEv
browseEvs :: Parser Bug
browseEvs = EventBrowser <$> pierPath
@ -331,6 +343,10 @@ bugCmd = fmap CmdBug
( info (checkFx <**> helper)
$ progDesc "Parse all data in event log"
)
<> command "partial-replay"
( info (replayEvs <**> helper)
$ progDesc "Replay up to N events"
)
<> command "dawn"
( info (checkDawn <**> helper)
$ progDesc "Test run dawn"

View File

@ -76,6 +76,7 @@ import Data.Default (def)
import King.App (runApp, runAppLogFile, runPierApp)
import King.App (HasConfigDir(..))
import RIO (logSticky, logStickyDone)
import System.Process (system)
import Text.Show.Pretty (pPrint)
import Urbit.Time (Wen)
import Vere.LockFile (lockFile)
@ -122,7 +123,7 @@ toSerfFlags CLI.Opts{..} = catMaybes m
, from oHashless Serf.Hashless
, from oQuiet Serf.Quiet
, from oVerbose Serf.Verbose
, from oDryRun Serf.DryRun
, from (oDryRun || isJust oDryFrom) Serf.DryRun
]
from True flag = Just flag
from False _ = Nothing
@ -131,12 +132,12 @@ toSerfFlags CLI.Opts{..} = catMaybes m
toPierConfig :: FilePath -> CLI.Opts -> PierConfig
toPierConfig pierPath CLI.Opts{..} = PierConfig
{ _pcPierPath = pierPath
, _pcDryRun = oDryRun
, _pcDryRun = (oDryRun || isJust oDryFrom)
}
toNetworkConfig :: CLI.Opts -> NetworkConfig
toNetworkConfig CLI.Opts{..} = NetworkConfig
{ ncNetworking = if oDryRun then NetworkNone
{ ncNetworking = if (oDryRun || isJust oDryFrom) then NetworkNone
else if oOffline then NetworkNone
else if oLocalhost then NetworkLocalhost
else NetworkNormal
@ -182,8 +183,8 @@ runOrExitImmediately getPier oExit =
tryPlayShip :: ( HasLogFunc e, HasNetworkConfig e, HasPierConfig e
, HasConfigDir e
)
=> Bool -> Bool -> Serf.Flags -> RIO e ()
tryPlayShip exitImmediately fullReplay flags = do
=> Bool -> Bool -> Maybe Word64 -> Serf.Flags -> RIO e ()
tryPlayShip exitImmediately fullReplay playFrom flags = do
when fullReplay wipeSnapshot
runOrExitImmediately resumeShip exitImmediately
where
@ -201,7 +202,7 @@ tryPlayShip exitImmediately fullReplay flags = do
resumeShip = do
view pierPathL >>= lockFile
rio $ logTrace "RESUMING SHIP"
sls <- Pier.resumed flags
sls <- Pier.resumed playFrom flags
rio $ logTrace "SHIP RESUMED"
pure sls
@ -283,6 +284,42 @@ collectAllFx top = do
--------------------------------------------------------------------------------
replayPartEvs :: e. HasLogFunc e => FilePath -> Word64 -> RIO e ()
replayPartEvs top last = do
logTrace $ display $ pack @Text top
fetchSnapshot
rwith replayedEvs $ \() ->
logTrace "Done replaying events!"
where
fetchSnapshot :: RIO e ()
fetchSnapshot = do
snap <- Pier.getSnapshot top last
case snap of
Nothing -> pure ()
Just sn -> do
let start = top <> "/.partial-replay/"
liftIO $ system $ "cp -r \"" <> sn <> "\" \"" <> tmpDir <> "\""
pure ()
tmpDir :: FilePath
tmpDir = top <> "/.partial-replay/" <> show last
replayedEvs :: RAcquire e ()
replayedEvs = do
lockFile top
log <- Log.existing (top <> "/.urb/log")
serf <- Serf.run (Serf.Config tmpDir serfFlags)
rio $ do
ss <- Serf.replay serf log $ Just last
Serf.snapshot serf ss
io $ threadDelay 500000 -- Copied from runOrExitImmediately
pure ()
serfFlags :: Serf.Flags
serfFlags = [Serf.Hashless]
--------------------------------------------------------------------------------
{-
Interesting
-}
@ -444,7 +481,8 @@ runShip (CLI.Run pierPath) opts = do
let pierConfig = toPierConfig pierPath opts
let networkConfig = toNetworkConfig opts
runPierApp pierConfig networkConfig $
tryPlayShip (CLI.oExit opts) (CLI.oFullReplay opts) (toSerfFlags opts)
tryPlayShip (CLI.oExit opts) (CLI.oFullReplay opts) (CLI.oDryFrom opts)
(toSerfFlags opts)
startBrowser :: HasLogFunc e => FilePath -> RIO e ()
@ -517,6 +555,7 @@ main = do
CLI.CmdBug (CLI.ValidatePill pax pil s) -> runApp $ testPill pax pil s
CLI.CmdBug (CLI.ValidateEvents pax f l) -> runApp $ checkEvs pax f l
CLI.CmdBug (CLI.ValidateFX pax f l) -> runApp $ checkFx pax f l
CLI.CmdBug (CLI.ReplayEvents pax l) -> runApp $ replayPartEvs pax l
CLI.CmdBug (CLI.CheckDawn pax) -> runApp $ checkDawn pax
CLI.CmdBug CLI.CheckComet -> runApp $ checkComet
CLI.CmdCon pier -> runAppLogFile $ connTerm pier

View File

@ -267,7 +267,6 @@ streamEvents :: HasLogFunc e
=> EventLog -> Word64
-> ConduitT () ByteString (RIO e) ()
streamEvents log first = do
last <- lift $ lastEv log
batch <- lift $ readBatch log first
unless (null batch) $ do
for_ batch yield

View File

@ -1,7 +1,7 @@
{-# OPTIONS_GHC -Wwarn #-}
module Vere.Pier
( booted, resumed, pier, runPersist, runCompute, generateBootSeq
( booted, resumed, getSnapshot, pier, runPersist, runCompute, generateBootSeq
) where
import UrbitPrelude
@ -11,6 +11,7 @@ import Config
import System.Random
import Vere.Pier.Types
import Data.List (uncons)
import Data.Text (append)
import King.App (HasConfigDir(..))
import System.Posix.Files (ownerModes, setFileMode)
@ -124,18 +125,36 @@ booted pill lite flags ship boot = do
-- Resume an existing ship. ----------------------------------------------------
resumed :: (HasPierConfig e, HasLogFunc e)
=> Serf.Flags
=> Maybe Word64 -> Serf.Flags
-> RAcquire e (Serf e, EventLog, SerfState)
resumed flags = do
resumed event flags = do
top <- view pierPathL
tap <- case event of
Nothing -> pure top
Just ev -> rio $ do
snap <- getSnapshot top ev
case snap of
Nothing -> pure top
Just sn -> pure sn
rio $ logTrace $ displayShow tap
log <- Log.existing (top <> "/.urb/log")
serf <- Serf.run (Serf.Config top flags)
serfSt <- rio $ Serf.replay serf log
serf <- Serf.run (Serf.Config tap flags)
serfSt <- rio $ Serf.replay serf log event
rio $ Serf.snapshot serf serfSt
pure (serf, log, serfSt)
getSnapshot :: String -> Word64 -> RIO e (Maybe String)
getSnapshot top last = do
createDirectoryIfMissing True $ top <> "/.partial-replay/"
snapshots <- listDirectory $ top <> "/.partial-replay/"
let nums = UrbitPrelude.mapMaybe readMay snapshots :: [Int]
let snaps = sort $ UrbitPrelude.filter (\n -> n <= (fromIntegral last)) nums
case (Data.List.uncons $ reverse snaps) of
Nothing -> pure Nothing
Just (sn, _) -> pure $ Just $ top <> "/.partial-replay/" <> (show sn)
-- Run Pier --------------------------------------------------------------------

View File

@ -29,12 +29,13 @@ import System.Exit (ExitCode)
import qualified Urbit.Ob as Ob
import qualified Data.ByteString.Unsafe as BS
import qualified Data.Text as T
import qualified System.IO as IO
import qualified System.IO.Error as IO
import qualified Urbit.Time as Time
import qualified Vere.Log as Log
import qualified Data.ByteString.Unsafe as BS
import qualified Data.Text as T
import qualified System.IO as IO
import qualified System.IO.Error as IO
import qualified Urbit.Time as Time
import qualified Vere.Log as Log
import qualified Data.Conduit.Combinators as CC
-- Serf Config -----------------------------------------------------------------
@ -437,12 +438,21 @@ replayJobs serf lastEv = go Nothing
updateProgressBar start msg
replay :: HasLogFunc e => Serf e -> Log.EventLog -> RIO e SerfState
replay serf log = do
replay :: HasLogFunc e
=> Serf e -> Log.EventLog -> Maybe Word64 -> RIO e SerfState
replay serf log last = do
ss <- handshake serf (Log.identity log)
lastEv <- Log.lastEv log
let numEvs = case last of
Nothing -> ssNextEv ss
Just la -> la - (ssNextEv ss) + 1
lastEv <- case last of
Nothing -> Log.lastEv log
Just la -> pure la
runConduit $ Log.streamEvents log (ssNextEv ss)
.| CC.take (fromIntegral numEvs)
.| toJobs (Log.identity log) (ssNextEv ss)
.| replayJobs serf (fromIntegral lastEv) ss