Improve plot_progression.py script

Update execution-order illustration
Update docs to reflect addition of --conclude
2024-10-26 14:10:51 +03:00 · 2024-02-02 17:51:31 +01:00 · 2024-02-02 17:46:23 +01:00 · 2024-02-02 17:46:23 +01:00 · 2024-02-02 17:46:23 +01:00
9 changed files with 1112 additions and 599 deletions
--- a/doc/execution-order.png
+++ b/doc/execution-order.png
--- a/doc/execution-order.svg
+++ b/doc/execution-order.svg
--- a/doc/hyperfine.1
+++ b/doc/hyperfine.1
@ -16,6 +16,8 @@ hyperfine \- command\-line benchmarking tool
 .IR CMD ]
 .RB [ \-\-prepare
 .IR CMD ]
+.RB [ \-\-conclude
+.IR CMD ]
 .RB [ \-\-cleanup
 .IR CMD ]
 .RB [ \-\-parameter\-scan
@ -115,11 +117,20 @@ The \fB\-\-prepare\fR option can be specified once for all commands or multiple
 once for each command. In the latter case, each preparation command will be
 run prior to the corresponding benchmark command.
 .HP
+.IP
+Execute \fICMD\fP after each timing run. This is useful for clearing disk caches,
+for example.
+The \fB\-\-conclude\fR option can be specified once for all commands or multiple times,
+once for each command. In the latter case, each conclusion command will be
+run after the corresponding benchmark command.
+.HP
 \fB\-c\fR, \fB\-\-cleanup\fR \fICMD...\fP
 .IP
 Execute \fICMD\fP after the completion of all benchmarking runs for each individual
 command to be benchmarked. This is useful if the commands to be benchmarked
-produce artifacts that need to be cleaned up.
+produce artifacts that need to be cleaned up. It only runs once a series of
+benchmark runs, as opposed to \fB\-\-conclude\fR option which runs after
+ever run.
 .HP
 \fB\-P\fR, \fB\-\-parameter\-scan\fR \fIVAR\fP \fIMIN\fP \fIMAX\fP
 .IP
@ -335,12 +346,13 @@ Export the results of a parameter scan benchmark to a markdown table:
 .fi
 .RE
 .LP
-Demonstrate when each of \fB\-\-setup\fR, \fB\-\-prepare\fR, \fIcmd\fP and \fB\-\-cleanup\fR will run:
+Demonstrate when each of \fB\-\-setup\fR, \fB\-\-prepare\fR, \fB\-\-conclude\fR, \fIcmd\fP and \fB\-\-cleanup\fR will run:
 .RS
 .nf
 \fBhyperfine\fR \fB\-L\fR n 1,2 \fB\-r\fR 2 \fB\-\-show-output\fR \\
 	\fB\-\-setup\fR 'echo setup n={n}' \\
 	\fB\-\-prepare\fR 'echo prepare={n}' \\
+	\fB\-\-conclude\fR 'echo conclude={n}' \\
 	\fB\-\-cleanup\fR 'echo cleanup n={n}' \\
 	'echo command n={n}'
 .fi
--- a/scripts/plot_progression.py
+++ b/scripts/plot_progression.py
@ -30,32 +30,48 @@ parser.add_argument(
    metavar="num_runs",
    help="Width of the moving-average window (default: N/5)",
 )
+parser.add_argument(
+    "--no-moving-average",
+    action="store_true",
+    help="Do not show moving average curve",
+)
+

 args = parser.parse_args()

 with open(args.file) as f:
    results = json.load(f)["results"]

-label = results[0]["command"]
-times = results[0]["times"]
-num = len(times)
-nums = range(num)
+for result in results:
+    label = result["command"]
+    times = result["times"]
+    num = len(times)
+    nums = range(num)

-plt.scatter(x=nums, y=times, marker=".", color="orange")
-plt.ylim([0, None])
-plt.xlim([-1, num])
+    plt.scatter(x=nums, y=times, marker=".")
+    plt.ylim([0, None])
+    plt.xlim([-1, num])

-moving_average_width = (
-    num // 5 if args.moving_average_width is None else args.moving_average_width
-)
+    if not args.no_moving_average:
+        moving_average_width = (
+            num // 5 if args.moving_average_width is None else args.moving_average_width
+        )

-moving_average = moving_average(times, moving_average_width)
-plt.plot(nums, moving_average, "-", color="blue")
+        average = moving_average(times, moving_average_width)
+        plt.plot(nums, average, "-")

 if args.title:
    plt.title(args.title)
-plt.legend(labels=[label], loc="best", fontsize="medium")
+
+legend = []
+for result in results:
+    legend.append(result["command"])
+    if not args.no_moving_average:
+        legend.append("moving average")
+plt.legend(legend)
+
 plt.ylabel("Time [s]")
+
 if args.output:
    plt.savefig(args.output)
 else:
--- a/src/benchmark/mod.rs
+++ b/src/benchmark/mod.rs
@ -110,6 +110,14 @@ impl<'a> Benchmark<'a> {
        self.run_intermediate_command(command, error_output)
    }

+    /// Run the command specified by `--conclude`.
+    fn run_conclusion_command(&self, command: &Command<'_>) -> Result<TimingResult> {
+        let error_output = "The conclusion command terminated with a non-zero exit code. \
+                            Append ' || true' to the command if you are sure that this can be ignored.";
+
+        self.run_intermediate_command(command, error_output)
+    }
+
    /// Run the benchmark for a single command
    pub fn run(&self) -> Result<BenchmarkResult> {
        if self.options.output_style != OutputStyleOption::Disabled {
@ -146,6 +154,25 @@ impl<'a> Benchmark<'a> {
                .transpose()
        };

+        let conclusion_command = self.options.conclusion_command.as_ref().map(|values| {
+            let conclusion_command = if values.len() == 1 {
+                &values[0]
+            } else {
+                &values[self.number]
+            };
+            Command::new_parametrized(
+                None,
+                conclusion_command,
+                self.command.get_parameters().iter().cloned(),
+            )
+        });
+        let run_conclusion_command = || {
+            conclusion_command
+                .as_ref()
+                .map(|cmd| self.run_conclusion_command(cmd))
+                .transpose()
+        };
+
        self.run_setup_command(self.command.get_parameters().iter().cloned())?;

        // Warmup phase
@ -163,6 +190,7 @@ impl<'a> Benchmark<'a> {
            for _ in 0..self.options.warmup_count {
                let _ = run_preparation_command()?;
                let _ = self.executor.run_command_and_measure(self.command, None)?;
+                let _ = run_conclusion_command()?;
                if let Some(bar) = progress_bar.as_ref() {
                    bar.inc(1)
                }
@ -191,10 +219,16 @@ impl<'a> Benchmark<'a> {
        let (res, status) = self.executor.run_command_and_measure(self.command, None)?;
        let success = status.success();

+        let conclusion_result = run_conclusion_command()?;
+        let conclusion_overhead =
+            conclusion_result.map_or(0.0, |res| res.time_real + self.executor.time_overhead());
+
        // Determine number of benchmark runs
        let runs_in_min_time = (self.options.min_benchmarking_time
-            / (res.time_real + self.executor.time_overhead() + preparation_overhead))
-            as u64;
+            / (res.time_real
+                + self.executor.time_overhead()
+                + preparation_overhead
+                + conclusion_overhead)) as u64;

        let count = {
            let min = cmp::max(runs_in_min_time, self.options.run_bounds.min);
@ -251,6 +285,8 @@ impl<'a> Benchmark<'a> {
            if let Some(bar) = progress_bar.as_ref() {
                bar.inc(1)
            }
+
+            run_conclusion_command()?;
        }

        if let Some(bar) = progress_bar.as_ref() {
--- a/src/cli.rs
+++ b/src/cli.rs
@ -102,6 +102,23 @@ fn build_command() -> Command {
                     be run prior to the corresponding benchmark command.",
                ),
        )
+        .arg(
+            Arg::new("conclude")
+                .long("conclude")
+                .short('C')
+                .action(ArgAction::Append)
+                .num_args(1)
+                .value_name("CMD")
+                .value_hint(ValueHint::CommandString)
+                .help(
+                    "Execute CMD after each timing run. This is useful for killing \
+                     long-running processes started (e.g. a web server started in --prepare), \
+                     for example.\nThe --conclude option can be specified once for all \
+                     commands or multiple times, once for each command. In the latter case, \
+                     each conclude command will be run after the corresponding benchmark \
+                     command.",
+                ),
+        )
        .arg(
            Arg::new("cleanup")
                .long("cleanup")
--- a/src/options.rs
+++ b/src/options.rs
@ -207,6 +207,9 @@ pub struct Options {
    /// Command(s) to run before each timing run
    pub preparation_command: Option<Vec<String>>,

+    /// Command(s) to run after each timing run
+    pub conclusion_command: Option<Vec<String>>,
+
    /// Command to run before each *batch* of timing runs, i.e. before each individual benchmark
    pub setup_command: Option<String>,

@ -243,6 +246,7 @@ impl Default for Options {
            min_benchmarking_time: 3.0,
            command_failure_action: CmdFailureAction::RaiseError,
            preparation_command: None,
+            conclusion_command: None,
            setup_command: None,
            cleanup_command: None,
            output_style: OutputStyleOption::Full,
@ -304,6 +308,10 @@ impl Options {
            .get_many::<String>("prepare")
            .map(|values| values.map(String::from).collect::<Vec<String>>());

+        options.conclusion_command = matches
+            .get_many::<String>("conclude")
+            .map(|values| values.map(String::from).collect::<Vec<String>>());
+
        options.cleanup_command = matches.get_one::<String>("cleanup").map(String::from);

        options.command_output_policy = if matches.get_flag("show-output") {
@ -432,6 +440,15 @@ impl Options {
            );
        }

+        if let Some(conclusion_command) = &self.conclusion_command {
+            ensure!(
+                conclusion_command.len() <= 1
+                    || commands.num_commands() == conclusion_command.len(),
+                "The '--conclude' option has to be provided just once or N times, where N is the \
+             number of benchmark commands."
+            );
+        }
+
        Ok(())
    }
 }
--- a/tests/execution_order_tests.rs
+++ b/tests/execution_order_tests.rs
@ -54,6 +54,11 @@ impl ExecutionOrderTest {
        self.command(output)
    }

+    fn conclude(&mut self, output: &str) -> &mut Self {
+        self.arg("--conclude");
+        self.command(output)
+    }
+
    fn cleanup(&mut self, output: &str) -> &mut Self {
        self.arg("--cleanup");
        self.command(output)
@ -160,6 +165,24 @@ fn prepare_commands_are_executed_before_each_timing_run() {
        .run();
 }

+#[test]
+fn conclude_commands_are_executed_after_each_timing_run() {
+    ExecutionOrderTest::new()
+        .arg("--runs=2")
+        .conclude("conclude")
+        .command("command 1")
+        .command("command 2")
+        .expect_output("command 1")
+        .expect_output("conclude")
+        .expect_output("command 1")
+        .expect_output("conclude")
+        .expect_output("command 2")
+        .expect_output("conclude")
+        .expect_output("command 2")
+        .expect_output("conclude")
+        .run();
+}
+
 #[test]
 fn prepare_commands_are_executed_before_each_warmup() {
    ExecutionOrderTest::new()
@ -187,6 +210,33 @@ fn prepare_commands_are_executed_before_each_warmup() {
        .run();
 }

+#[test]
+fn conclude_commands_are_executed_after_each_warmup() {
+    ExecutionOrderTest::new()
+        .arg("--warmup=2")
+        .arg("--runs=1")
+        .conclude("conclude")
+        .command("command 1")
+        .command("command 2")
+        // warmup 1
+        .expect_output("command 1")
+        .expect_output("conclude")
+        .expect_output("command 1")
+        .expect_output("conclude")
+        // benchmark 1
+        .expect_output("command 1")
+        .expect_output("conclude")
+        // warmup 2
+        .expect_output("command 2")
+        .expect_output("conclude")
+        .expect_output("command 2")
+        .expect_output("conclude")
+        // benchmark 2
+        .expect_output("command 2")
+        .expect_output("conclude")
+        .run();
+}
+
 #[test]
 fn cleanup_commands_are_executed_once_after_each_benchmark() {
    ExecutionOrderTest::new()
@ -234,6 +284,44 @@ fn setup_prepare_cleanup_combined() {
        .run();
 }

+#[test]
+fn setup_prepare_conclude_cleanup_combined() {
+    ExecutionOrderTest::new()
+        .arg("--warmup=1")
+        .arg("--runs=2")
+        .setup("setup")
+        .prepare("prepare")
+        .command("command1")
+        .command("command2")
+        .conclude("conclude")
+        .cleanup("cleanup")
+        // 1
+        .expect_output("setup")
+        .expect_output("prepare")
+        .expect_output("command1")
+        .expect_output("conclude")
+        .expect_output("prepare")
+        .expect_output("command1")
+        .expect_output("conclude")
+        .expect_output("prepare")
+        .expect_output("command1")
+        .expect_output("conclude")
+        .expect_output("cleanup")
+        // 2
+        .expect_output("setup")
+        .expect_output("prepare")
+        .expect_output("command2")
+        .expect_output("conclude")
+        .expect_output("prepare")
+        .expect_output("command2")
+        .expect_output("conclude")
+        .expect_output("prepare")
+        .expect_output("command2")
+        .expect_output("conclude")
+        .expect_output("cleanup")
+        .run();
+}
+
 #[test]
 fn single_parameter_value() {
    ExecutionOrderTest::new()
--- a/tests/integration_tests.rs
+++ b/tests/integration_tests.rs
@ -75,6 +75,31 @@ fn fails_with_wrong_number_of_prepare_options() {
        ));
 }

+#[test]
+fn fails_with_wrong_number_of_conclude_options() {
+    hyperfine()
+        .arg("--runs=1")
+        .arg("--conclude=echo a")
+        .arg("--conclude=echo b")
+        .arg("echo a")
+        .arg("echo b")
+        .assert()
+        .success();
+
+    hyperfine()
+        .arg("--runs=1")
+        .arg("--conclude=echo a")
+        .arg("--conclude=echo b")
+        .arg("echo a")
+        .arg("echo b")
+        .arg("echo c")
+        .assert()
+        .failure()
+        .stderr(predicate::str::contains(
+            "The '--conclude' option has to be provided",
+        ));
+}
+
 #[test]
 fn fails_with_duplicate_parameter_names() {
    hyperfine()
@ -167,6 +192,18 @@ fn fails_for_unknown_prepare_command() {
        ));
 }

+#[test]
+fn fails_for_unknown_conclude_command() {
+    hyperfine()
+        .arg("--conclude=some-nonexisting-program-b5d9574198b7e4b12a71fa4747c0a577")
+        .arg("echo test")
+        .assert()
+        .failure()
+        .stderr(predicate::str::contains(
+            "The conclusion command terminated with a non-zero exit code.",
+        ));
+}
+
 #[cfg(unix)]
 #[test]
 fn can_run_failing_commands_with_ignore_failure_option() {
@ -320,6 +357,48 @@ fn takes_preparation_command_into_account_for_computing_number_of_runs() {
        .stdout(predicate::str::contains("30 runs"));
 }

+#[test]
+fn takes_conclusion_command_into_account_for_computing_number_of_runs() {
+    hyperfine_debug()
+        .arg("--conclude=sleep 0.02")
+        .arg("sleep 0.01")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("100 runs"));
+
+    // Shell overhead needs to be added to both the conclude command and the actual command,
+    // leading to a total benchmark time of (cmd + shell + conclude + shell = 0.1 s)
+    hyperfine_debug()
+        .arg("--shell=sleep 0.01")
+        .arg("--conclude=sleep 0.03")
+        .arg("sleep 0.05")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("30 runs"));
+}
+
+#[test]
+fn takes_both_preparation_and_conclusion_command_into_account_for_computing_number_of_runs() {
+    hyperfine_debug()
+        .arg("--prepare=sleep 0.01")
+        .arg("--conclude=sleep 0.01")
+        .arg("sleep 0.01")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("100 runs"));
+
+    // Shell overhead needs to be added to both the prepare, conclude and the actual command,
+    // leading to a total benchmark time of (prepare + shell + cmd + shell + conclude + shell = 0.1 s)
+    hyperfine_debug()
+        .arg("--shell=sleep 0.01")
+        .arg("--prepare=sleep 0.01")
+        .arg("--conclude=sleep 0.01")
+        .arg("sleep 0.05")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("30 runs"));
+}
+
 #[test]
 fn shows_benchmark_comparison_with_relative_times() {
    hyperfine_debug()
Author	SHA1	Message	Date
David Peter	c7be935da3	Improve plot_progression.py script	2024-02-02 17:51:31 +01:00
David Peter	a75dca90e1	Update execution-order illustration	2024-02-02 17:46:23 +01:00
Jack O'Connor	942719b0ed	Update docs to reflect addition of --conclude I'm not use to inkscape but I did my best to insert the --conclude stage into the existing image without changing it too much.	2024-02-02 17:46:23 +01:00
Jack O'Connor	e13d0cb144	Add --conclude option with tests This implementation of --conclude is basically identical to the existing implementation for --prepare. I have not adapted any existing tests. All tests which test --conclude functionality are new. The --conclude commands will run in the same order in which they are supplied on the cli in the same manner as --prepare and there must either be one --conclude command, or else the same number of --concludes as there are commands.	2024-02-02 17:46:23 +01:00