diff --git a/.gitignore b/.gitignore index 3a1149cf..e8e8e452 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ hie.yaml .stack-work/ .stack-work-test/ +.stack-work-bench/ stack.yaml.lock .hie/ stan.html diff --git a/scripts/benchmark-against-parent.sh b/scripts/benchmark-against-parent.sh index bc2dfc34..d530d115 100755 --- a/scripts/benchmark-against-parent.sh +++ b/scripts/benchmark-against-parent.sh @@ -16,7 +16,8 @@ fi BASELINE_OUTPUT=baseline.csv git checkout HEAD~ -STACK_WORK=.stack-work-bench stack bench swarm:benchmark --benchmark-arguments "--csv $BASELINE_OUTPUT --color always" + +scripts/run-benchmarks.sh "--csv $BASELINE_OUTPUT" git switch - -STACK_WORK=.stack-work-bench stack bench swarm:benchmark --benchmark-arguments "--baseline $BASELINE_OUTPUT --fail-if-slower 3 --color always" \ No newline at end of file +scripts/run-benchmarks.sh "--baseline $BASELINE_OUTPUT --fail-if-slower 3" \ No newline at end of file diff --git a/scripts/run-benchmarks.sh b/scripts/run-benchmarks.sh new file mode 100755 index 00000000..1a5daba4 --- /dev/null +++ b/scripts/run-benchmarks.sh @@ -0,0 +1,7 @@ +#!/bin/bash -xe + + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +cd $SCRIPT_DIR/.. + +STACK_WORK=.stack-work-bench stack bench swarm:benchmark --benchmark-arguments "--color always $@" diff --git a/swarm.cabal b/swarm.cabal index d3eef3ef..03e17946 100644 --- a/swarm.cabal +++ b/swarm.cabal @@ -798,8 +798,10 @@ benchmark benchmark type: exitcode-stdio-1.0 build-depends: tasty-bench >= 0.3.1 && < 0.4, base, + extra, lens, mtl, + text, swarm:swarm-engine, swarm:swarm-util, swarm:swarm-lang, diff --git a/test/bench/Benchmark.hs b/test/bench/Benchmark.hs index fc15d4ff..c77e6db7 100644 --- a/test/bench/Benchmark.hs +++ b/test/bench/Benchmark.hs @@ -11,6 +11,8 @@ import Control.Monad (replicateM_) import Control.Monad.State (evalStateT, execStateT) import Data.Map qualified as M import Data.Sequence (Seq) +import Data.Text qualified as T +import Data.Tuple.Extra (dupe) import Swarm.Effect (runTimeIO) import Swarm.Game.CESK (emptyStore, initMachine) import Swarm.Game.Display (defaultRobotDisplay) @@ -30,6 +32,7 @@ import Swarm.Language.Context qualified as Context import Swarm.Language.Pipeline (ProcessedTerm) import Swarm.Language.Pipeline.QQ (tmQ) import Swarm.Language.Syntax +import Swarm.Util (parens, showT) import Swarm.Util.Effect (simpleErrorHandle) import Swarm.Util.Erasable import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, defaultMain, whnfAppIO) @@ -157,38 +160,56 @@ runGame numGameTicks = evalStateT (replicateM_ numGameTicks $ runTimeIO gameTick main :: IO () main = do - idlers <- mkGameStates idleProgram - trees <- mkGameStates treeProgram - circlers <- mkGameStates circlerProgram - movers <- mkGameStates moverProgram - wavesInlined <- mkGameStates (waveProgram True) - wavesWithDef <- mkGameStates (waveProgram False) + idlers <- mkGameStates largeRobotNumbers idleProgram + trees <- mkGameStates robotNumbers treeProgram + circlers <- mkGameStates robotNumbers circlerProgram + movers <- mkGameStates robotNumbers moverProgram + wavesInlined <- mkGameStates robotNumbers $ waveProgram True + wavesWithDef <- mkGameStates robotNumbers $ waveProgram False -- In theory we should force the evaluation of these game states to normal -- form before running the benchmarks. In practice, the first of the many -- criterion runs for each of these benchmarks doesn't look like an outlier. defaultMain [ bgroup "run 1000 game ticks" - [ bgroup "idlers" (toBenchmarks idlers) - , bgroup "trees" (toBenchmarks trees) - , bgroup "circlers" (toBenchmarks circlers) - , bgroup "movers" (toBenchmarks movers) - , bgroup "wavesInlined" (toBenchmarks wavesInlined) + [ bgroupTicks "idlers" 10000 idlers + , bgroupTicks "trees" 1000 trees + , bgroupTicks "circlers" 1000 circlers + , bgroupTicks "movers" 1000 movers , bgroup - "wavesWithDef" - ( zipWith (\i -> bcompare ("wavesInlined." <> show i)) robotNumbers $ - toBenchmarks wavesWithDef - ) + "waves comparison" + [ bgroup "wavesInlined" (toBenchmarks 1000 wavesInlined) + , bgroup + "wavesWithDef" + ( zipWith (\i -> bcompare ("wavesInlined." <> show i)) robotNumbers $ + toBenchmarks 1000 wavesWithDef + ) + ] ] ] where + bgroupTicks label ticks bots = + bgroup newLabel $ toBenchmarks ticks bots + where + newLabel = + unwords + [ label + , T.unpack $ + parens $ + T.unwords + [ showT ticks + , "ticks" + ] + ] + robotNumbers = [10, 20 .. 40] + largeRobotNumbers = take 4 $ iterate (* 2) 100 - mkGameStates :: ProcessedTerm -> IO [(Int, GameState)] - mkGameStates prog = zip robotNumbers <$> mapM (mkGameState prog $ initRobot prog) robotNumbers + mkGameStates :: [Int] -> ProcessedTerm -> IO [(Int, GameState)] + mkGameStates botCounts prog = mapM (traverse (mkGameState prog $ initRobot prog) . dupe) botCounts - toBenchmarks :: [(Int, GameState)] -> [Benchmark] - toBenchmarks gameStates = - [ bench (show n) $ whnfAppIO (runGame 1000) gs + toBenchmarks :: Int -> [(Int, GameState)] -> [Benchmark] + toBenchmarks tickCount gameStates = + [ bench (show n) $ whnfAppIO (runGame tickCount) gs | (n, gs) <- gameStates ]