mirror of
https://github.com/Bodigrim/tasty-bench.git
synced 2024-11-27 12:43:34 +03:00
Initial commit
This commit is contained in:
commit
97cc2dc03a
180
.github/workflows/haskell-ci.yml
vendored
Normal file
180
.github/workflows/haskell-ci.yml
vendored
Normal file
@ -0,0 +1,180 @@
|
||||
# This GitHub workflow config has been generated by a script via
|
||||
#
|
||||
# haskell-ci 'github' 'tasty-bench.cabal'
|
||||
#
|
||||
# To regenerate the script (for example after adjusting tested-with) run
|
||||
#
|
||||
# haskell-ci regenerate
|
||||
#
|
||||
# For more information, see https://github.com/haskell-CI/haskell-ci
|
||||
#
|
||||
# version: 0.11.20210111
|
||||
#
|
||||
# REGENDATA ("0.11.20210111",["github","tasty-bench.cabal"])
|
||||
#
|
||||
name: Haskell-CI
|
||||
on:
|
||||
- push
|
||||
- pull_request
|
||||
jobs:
|
||||
linux:
|
||||
name: Haskell-CI Linux - GHC ${{ matrix.ghc }}
|
||||
runs-on: ubuntu-18.04
|
||||
container:
|
||||
image: buildpack-deps:bionic
|
||||
continue-on-error: ${{ matrix.allow-failure }}
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- ghc: 8.10.3
|
||||
allow-failure: false
|
||||
- ghc: 8.8.4
|
||||
allow-failure: false
|
||||
- ghc: 8.6.5
|
||||
allow-failure: false
|
||||
- ghc: 8.4.4
|
||||
allow-failure: false
|
||||
- ghc: 8.2.2
|
||||
allow-failure: false
|
||||
- ghc: 8.0.2
|
||||
allow-failure: false
|
||||
- ghc: 7.10.3
|
||||
allow-failure: false
|
||||
- ghc: 7.8.4
|
||||
allow-failure: false
|
||||
- ghc: 7.6.3
|
||||
allow-failure: false
|
||||
- ghc: 7.4.2
|
||||
allow-failure: false
|
||||
- ghc: 7.2.2
|
||||
allow-failure: false
|
||||
- ghc: 7.0.4
|
||||
allow-failure: false
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: apt
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common
|
||||
apt-add-repository -y 'ppa:hvr/ghc'
|
||||
apt-get update
|
||||
apt-get install -y ghc-$GHC_VERSION cabal-install-3.2
|
||||
env:
|
||||
GHC_VERSION: ${{ matrix.ghc }}
|
||||
- name: Set PATH and environment variables
|
||||
run: |
|
||||
echo "$HOME/.cabal/bin" >> $GITHUB_PATH
|
||||
echo "LANG=C.UTF-8" >> $GITHUB_ENV
|
||||
echo "CABAL_DIR=$HOME/.cabal" >> $GITHUB_ENV
|
||||
echo "CABAL_CONFIG=$HOME/.cabal/config" >> $GITHUB_ENV
|
||||
HC=/opt/ghc/$GHC_VERSION/bin/ghc
|
||||
echo "HC=$HC" >> $GITHUB_ENV
|
||||
echo "HCPKG=/opt/ghc/$GHC_VERSION/bin/ghc-pkg" >> $GITHUB_ENV
|
||||
echo "HADDOCK=/opt/ghc/$GHC_VERSION/bin/haddock" >> $GITHUB_ENV
|
||||
echo "CABAL=/opt/cabal/3.2/bin/cabal -vnormal+nowrap" >> $GITHUB_ENV
|
||||
HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))')
|
||||
echo "HCNUMVER=$HCNUMVER" >> $GITHUB_ENV
|
||||
echo "ARG_TESTS=--enable-tests" >> $GITHUB_ENV
|
||||
echo "ARG_BENCH=--enable-benchmarks" >> $GITHUB_ENV
|
||||
echo "ARG_COMPILER=--ghc --with-compiler=/opt/ghc/$GHC_VERSION/bin/ghc" >> $GITHUB_ENV
|
||||
echo "GHCJSARITH=0" >> $GITHUB_ENV
|
||||
env:
|
||||
GHC_VERSION: ${{ matrix.ghc }}
|
||||
- name: env
|
||||
run: |
|
||||
env
|
||||
- name: write cabal config
|
||||
run: |
|
||||
mkdir -p $CABAL_DIR
|
||||
cat >> $CABAL_CONFIG <<EOF
|
||||
remote-build-reporting: anonymous
|
||||
write-ghc-environment-files: never
|
||||
remote-repo-cache: $CABAL_DIR/packages
|
||||
logs-dir: $CABAL_DIR/logs
|
||||
world-file: $CABAL_DIR/world
|
||||
extra-prog-path: $CABAL_DIR/bin
|
||||
symlink-bindir: $CABAL_DIR/bin
|
||||
installdir: $CABAL_DIR/bin
|
||||
build-summary: $CABAL_DIR/logs/build.log
|
||||
store-dir: $CABAL_DIR/store
|
||||
install-dirs user
|
||||
prefix: $CABAL_DIR
|
||||
repository hackage.haskell.org
|
||||
url: http://hackage.haskell.org/
|
||||
EOF
|
||||
cat $CABAL_CONFIG
|
||||
- name: versions
|
||||
run: |
|
||||
$HC --version || true
|
||||
$HC --print-project-git-commit-id || true
|
||||
$CABAL --version || true
|
||||
- name: update cabal index
|
||||
run: |
|
||||
$CABAL v2-update -v
|
||||
- name: install cabal-plan
|
||||
run: |
|
||||
mkdir -p $HOME/.cabal/bin
|
||||
curl -sL https://github.com/haskell-hvr/cabal-plan/releases/download/v0.6.2.0/cabal-plan-0.6.2.0-x86_64-linux.xz > cabal-plan.xz
|
||||
echo 'de73600b1836d3f55e32d80385acc055fd97f60eaa0ab68a755302685f5d81bc cabal-plan.xz' | sha256sum -c -
|
||||
xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan
|
||||
rm -f cabal-plan.xz
|
||||
chmod a+x $HOME/.cabal/bin/cabal-plan
|
||||
cabal-plan --version
|
||||
- name: checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
path: source
|
||||
- name: sdist
|
||||
run: |
|
||||
mkdir -p sdist
|
||||
cd source || false
|
||||
$CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist
|
||||
- name: unpack
|
||||
run: |
|
||||
mkdir -p unpacked
|
||||
find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \;
|
||||
- name: generate cabal.project
|
||||
run: |
|
||||
PKGDIR_tasty_bench="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/tasty-bench-[0-9.]*')"
|
||||
echo "PKGDIR_tasty_bench=${PKGDIR_tasty_bench}" >> $GITHUB_ENV
|
||||
touch cabal.project
|
||||
touch cabal.project.local
|
||||
echo "packages: ${PKGDIR_tasty_bench}" >> cabal.project
|
||||
if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "package tasty-bench" >> cabal.project ; fi
|
||||
if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo " ghc-options: -Werror=missing-methods" >> cabal.project ; fi
|
||||
cat >> cabal.project <<EOF
|
||||
EOF
|
||||
$HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: $_ installed\n" unless /^(tasty-bench)$/; }' >> cabal.project.local
|
||||
cat cabal.project
|
||||
cat cabal.project.local
|
||||
- name: dump install plan
|
||||
run: |
|
||||
$CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all
|
||||
cabal-plan
|
||||
- name: cache
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
key: ${{ runner.os }}-${{ matrix.ghc }}-${{ github.sha }}
|
||||
path: ~/.cabal/store
|
||||
restore-keys: ${{ runner.os }}-${{ matrix.ghc }}-
|
||||
- name: install dependencies
|
||||
run: |
|
||||
$CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all
|
||||
$CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all
|
||||
- name: build w/o tests
|
||||
run: |
|
||||
$CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all
|
||||
- name: build
|
||||
run: |
|
||||
$CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always
|
||||
- name: cabal check
|
||||
run: |
|
||||
cd ${PKGDIR_tasty_bench} || false
|
||||
${CABAL} -vnormal check
|
||||
- name: haddock
|
||||
run: |
|
||||
$CABAL v2-haddock $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all
|
||||
- name: unconstrained build
|
||||
run: |
|
||||
rm -f cabal.project.local
|
||||
$CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all
|
21
LICENSE
Normal file
21
LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 Andrew Lelechenko
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
139
README.md
Normal file
139
README.md
Normal file
@ -0,0 +1,139 @@
|
||||
# tasty-bench
|
||||
|
||||
Featherlight benchmark framework (only one file!) for performance measurement with API mimicking [`criterion`](http://hackage.haskell.org/package/criterion) and [`gauge`](http://hackage.haskell.org/package/gauge).
|
||||
|
||||
## How lightweight is it?
|
||||
|
||||
There is only one source file `Test.Tasty.Bench`, less than 450 lines, and no external dependencies except [`tasty`](http://hackage.haskell.org/package/tasty). So if you already depend on `tasty` for a test suite, there
|
||||
is nothing else to install.
|
||||
|
||||
Compare this to `criterion` (10+ modules, 50+ dependencies) and `gauge` (40+ modules, depends on `basement` and `vector`).
|
||||
|
||||
## How is it possible?
|
||||
|
||||
Our benchmarks are literally regular `tasty` tests, so we can leverage all existing
|
||||
machinery for command-line options, resource management, structuring,
|
||||
listing and filtering benchmarks, running and reporting results. It also means
|
||||
that `tasty-bench` can be used in conjunction with other `tasty` ingredients.
|
||||
|
||||
Unlike `criterion` and `gauge` we use a very simple statistical model described below.
|
||||
This is arguably a questionable choice, but it works pretty well in practice.
|
||||
A rare developer is sufficiently well-versed in probability theory
|
||||
to make sense and use of all numbers generated by `criterion`.
|
||||
|
||||
## How to switch?
|
||||
|
||||
[Cabal mixins](https://cabal.readthedocs.io/en/3.4/cabal-package.html#pkg-field-mixins)
|
||||
allow to taste `tasty-bench` instead of `criterion` or `gauge`
|
||||
without changing a single line of code:
|
||||
|
||||
```cabal
|
||||
cabal-version: 2.0
|
||||
|
||||
benchmark foo
|
||||
...
|
||||
build-depends:
|
||||
tasty-bench
|
||||
mixins:
|
||||
tasty-bench (Test.Tasty.Bench as Criterion)
|
||||
```
|
||||
|
||||
This works vice versa as well: if you use `tasty-bench`, but at some point
|
||||
need a more comprehensive statistical analysis,
|
||||
it is easy to switch temporarily back to `criterion`.
|
||||
|
||||
## How to write a benchmark?
|
||||
|
||||
```haskell
|
||||
import Test.Tasty.Bench
|
||||
|
||||
fibo :: Int -> Integer
|
||||
fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2)
|
||||
|
||||
main :: IO ()
|
||||
main = defaultMain
|
||||
[ bgroup "fibonacci numbers"
|
||||
[ bench "fifth" $ nf fibo 5
|
||||
, bench "tenth" $ nf fibo 10
|
||||
, bench "twentieth" $ nf fibo 20
|
||||
]
|
||||
]
|
||||
```
|
||||
|
||||
Since `tasty-bench` provides an API compatible with `criterion`,
|
||||
one can refer to [its documentation](http://www.serpentine.com/criterion/tutorial.html#how-to-write-a-benchmark-suite) for more examples.
|
||||
|
||||
## How to read results?
|
||||
|
||||
Running the example above results in the following output:
|
||||
|
||||
```
|
||||
All
|
||||
fibonacci numbers
|
||||
fifth: OK (2.13s)
|
||||
63 ns ± 3.4 ns
|
||||
tenth: OK (1.71s)
|
||||
809 ns ± 73 ns
|
||||
twentieth: OK (3.39s)
|
||||
104 μs ± 4.9 μs
|
||||
|
||||
All 3 tests passed (7.25s)
|
||||
```
|
||||
|
||||
The output says that, for instance, the first benchmark
|
||||
was repeatedly executed for 2.13 seconds (wall time),
|
||||
its mean time was 63 nanoseconds and with 95% probability
|
||||
execution time should not diverge from the mean
|
||||
further than ±3.4 nanoseconds (double standard deviation).
|
||||
|
||||
## Statistical model
|
||||
|
||||
Here is a procedure, used by `tasty-bench` to measure execution time:
|
||||
|
||||
1. Set _n_ ← 1.
|
||||
2. Measure execution time _tₙ_ of _n_ iterations
|
||||
and execution time _t₂ₙ_ of _2n_ iterations.
|
||||
3. Find _t_ which minimizes deviation of (_nt_, _2nt_) from (_tₙ_, _t₂ₙ_).
|
||||
4. If deviation is small enough, return _t_ as a mean execution time.
|
||||
5. Otherwise set _n_ ← _2n_ and jump back to Step 2.
|
||||
|
||||
This is roughly similar to the linear regression approach which `criterion` takes,
|
||||
but we fit only two last points. This allows us to simplify away all heavy-weight
|
||||
statistical analysis. More importantly, earlier measurements,
|
||||
which are presumably shorter and noisier, do not affect overall result.
|
||||
This is in contrast to `criterion`, which fits all measurements and
|
||||
is biased to use more data points corresponding to shorter runs
|
||||
(it employs _n_ ← _1.05n_ progression).
|
||||
|
||||
## Command-line options
|
||||
|
||||
Use `--help` to list command-line options.
|
||||
|
||||
* `-p`, `--pattern`
|
||||
|
||||
This is a standard `tasty` option, which allows filtering benchmarks
|
||||
by a pattern or `awk` expression. Please refer to
|
||||
[`tasty` documentation](https://github.com/feuerbach/tasty#patterns)
|
||||
for details.
|
||||
|
||||
* `--plain`
|
||||
|
||||
Produce machine-readable output:
|
||||
`(mean in picoseconds, standard deviation in picoseconds)`.
|
||||
This is handy for consumption by other `tasty` ingredients.
|
||||
|
||||
* `-t`, `--timeout`
|
||||
|
||||
This is a standard `tasty` option, setting timeout for individual benchmarks
|
||||
in seconds. Use it when benchmarks tend to take too long: `tasty-bench` will make
|
||||
an effort to report results (even if of subpar quality) before timeout. Setting
|
||||
timeout too tight (insufficient for at least three iterations of benchmark)
|
||||
will result in a benchmark failure. Do not use `--timeout` without a reason:
|
||||
it forks an additional thread and thus affects reliability of measurements.
|
||||
|
||||
* `--stdev`
|
||||
|
||||
Target relative standard deviation of measurements in percents (5% by default).
|
||||
Large values correspond to fast and loose benchmarks, and small ones to long and precise.
|
||||
If it takes far too long, consider setting `--timeout`,
|
||||
which will interrupt benchmarks, potentially before reaching the target deviation.
|
447
Test/Tasty/Bench.hs
Normal file
447
Test/Tasty/Bench.hs
Normal file
@ -0,0 +1,447 @@
|
||||
{- |
|
||||
Module: Test.Tasty.Bench
|
||||
Copyright: (c) 2021 Andrew Lelechenko
|
||||
Licence: MIT
|
||||
|
||||
Featherlight benchmark framework (only one file!) for performance measurement with API mimicking [@criterion@](http://hackage.haskell.org/package/criterion) and [@gauge@](http://hackage.haskell.org/package/gauge).
|
||||
|
||||
=== How lightweight is it?
|
||||
|
||||
There is only one source file "Test.Tasty.Bench", less than 450 lines, and no external dependencies except [@tasty@](http://hackage.haskell.org/package/tasty). So if you already depend on @tasty@ for a test suite, there
|
||||
is nothing else to install.
|
||||
|
||||
Compare this to @criterion@ (10+ modules, 50+ dependencies) and @gauge@ (40+ modules, depends on @basement@ and @vector@).
|
||||
|
||||
=== How is it possible?
|
||||
|
||||
Our benchmarks are literally regular @tasty@ tests, so we can leverage all existing
|
||||
machinery for command-line options, resource management, structuring,
|
||||
listing and filtering benchmarks, running and reporting results. It also means
|
||||
that @tasty-bench@ can be used in conjunction with other @tasty@ ingredients.
|
||||
|
||||
Unlike @criterion@ and @gauge@ we use a very simple statistical model described below.
|
||||
This is arguably a questionable choice, but it works pretty well in practice.
|
||||
A rare developer is sufficiently well-versed in probability theory
|
||||
to make sense and use of all numbers generated by @criterion@.
|
||||
|
||||
=== How to switch?
|
||||
|
||||
[Cabal mixins](https://cabal.readthedocs.io/en/3.4/cabal-package.html#pkg-field-mixins)
|
||||
allow to taste @tasty-bench@ instead of @criterion@ or @gauge@
|
||||
without changing a single line of code:
|
||||
|
||||
@
|
||||
cabal-version: 2.0
|
||||
|
||||
benchmark foo
|
||||
...
|
||||
build-depends:
|
||||
tasty-bench
|
||||
mixins:
|
||||
tasty-bench (Test.Tasty.Bench as Criterion)
|
||||
@
|
||||
|
||||
This works vice versa as well: if you use @tasty-bench@, but at some point
|
||||
need a more comprehensive statistical analysis,
|
||||
it is easy to switch temporarily back to @criterion@.
|
||||
|
||||
=== How to write a benchmark?
|
||||
|
||||
@
|
||||
import Test.Tasty.Bench
|
||||
|
||||
fibo :: Int -> Integer
|
||||
fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2)
|
||||
|
||||
main :: IO ()
|
||||
main = defaultMain
|
||||
[ bgroup "fibonacci numbers"
|
||||
[ bench "fifth" $ nf fibo 5
|
||||
, bench "tenth" $ nf fibo 10
|
||||
, bench "twentieth" $ nf fibo 20
|
||||
]
|
||||
]
|
||||
@
|
||||
|
||||
Since @tasty-bench@ provides an API compatible with @criterion@,
|
||||
one can refer to [its documentation](http://www.serpentine.com/criterion/tutorial.html#how-to-write-a-benchmark-suite) for more examples.
|
||||
|
||||
=== How to read results?
|
||||
|
||||
Running the example above results in the following output:
|
||||
|
||||
@
|
||||
All
|
||||
fibonacci numbers
|
||||
fifth: OK (2.13s)
|
||||
63 ns ± 3.4 ns
|
||||
tenth: OK (1.71s)
|
||||
809 ns ± 73 ns
|
||||
twentieth: OK (3.39s)
|
||||
104 μs ± 4.9 μs
|
||||
|
||||
All 3 tests passed (7.25s)
|
||||
@
|
||||
|
||||
The output says that, for instance, the first benchmark
|
||||
was repeatedly executed for 2.13 seconds (wall time),
|
||||
its mean time was 63 nanoseconds and with 95% probability
|
||||
execution time should not diverge from the mean
|
||||
further than ±3.4 nanoseconds (double standard deviation).
|
||||
|
||||
=== Statistical model
|
||||
|
||||
Here is a procedure, used by @tasty-bench@ to measure execution time:
|
||||
|
||||
1. Set \( n \leftarrow 1 \).
|
||||
2. Measure execution time \( t_n \) of \( n \) iterations
|
||||
and execution time \( t_{2n} \) of \( 2n \) iterations.
|
||||
3. Find \( t \) which minimizes deviation of \( (nt, 2nt) \) from \( (t_n, t_{2n}) \).
|
||||
4. If deviation is small enough, return \( t \) as a mean execution time.
|
||||
5. Otherwise set \( n \leftarrow 2n \) and jump back to Step 2.
|
||||
|
||||
This is roughly similar to the linear regression approach which @criterion@ takes,
|
||||
but we fit only two last points. This allows us to simplify away all heavy-weight
|
||||
statistical analysis. More importantly, earlier measurements,
|
||||
which are presumably shorter and noisier, do not affect overall result.
|
||||
This is in contrast to @criterion@, which fits all measurements and
|
||||
is biased to use more data points corresponding to shorter runs
|
||||
(it employs \( n \leftarrow 1.05n \) progression).
|
||||
|
||||
=== Command-line options
|
||||
|
||||
Use @--help@ to list command-line options.
|
||||
|
||||
[@-p@, @--pattern@]:
|
||||
This is a standard @tasty@ option, which allows filtering benchmarks
|
||||
by a pattern or @awk@ expression. Please refer
|
||||
to [@tasty@ documentation](https://github.com/feuerbach/tasty#patterns)
|
||||
for details.
|
||||
|
||||
[@--plain@]:
|
||||
Produce machine-readable output:
|
||||
@(mean in picoseconds, standard deviation in picoseconds)@.
|
||||
This is handy for consumption by other @tasty@ ingredients.
|
||||
|
||||
[@-t@, @--timeout@]:
|
||||
This is a standard @tasty@ option, setting timeout for individual benchmarks
|
||||
in seconds. Use it when benchmarks tend to take too long: @tasty-bench@ will make
|
||||
an effort to report results (even if of subpar quality) before timeout. Setting
|
||||
timeout too tight (insufficient for at least three iterations of benchmark)
|
||||
will result in a benchmark failure. Do not use @--timeout@ without a reason:
|
||||
it forks an additional thread and thus affects reliability of measurements.
|
||||
|
||||
[@--stdev@]:
|
||||
Target relative standard deviation of measurements in percents (5% by default).
|
||||
Large values correspond to fast and loose benchmarks, and small ones to long and precise.
|
||||
If it takes far too long, consider setting @--timeout@,
|
||||
which will interrupt benchmarks, potentially before reaching the target deviation.
|
||||
|
||||
-}
|
||||
|
||||
module Test.Tasty.Bench
|
||||
(
|
||||
-- * Running 'Benchmark'
|
||||
defaultMain
|
||||
, Benchmark
|
||||
, bench
|
||||
, bgroup
|
||||
-- * Creating 'Benchmarkable'
|
||||
, Benchmarkable
|
||||
, nf
|
||||
, whnf
|
||||
, nfIO
|
||||
, whnfIO
|
||||
, nfAppIO
|
||||
, whnfAppIO
|
||||
) where
|
||||
|
||||
import Control.Applicative
|
||||
import Control.DeepSeq
|
||||
import Control.Exception
|
||||
import Data.Data (Typeable)
|
||||
import Data.Int
|
||||
import Data.Monoid
|
||||
import Data.Proxy
|
||||
import System.CPUTime
|
||||
import System.Mem
|
||||
import Test.Tasty hiding (defaultMain)
|
||||
import qualified Test.Tasty
|
||||
import Test.Tasty.Options
|
||||
import Test.Tasty.Providers
|
||||
import Text.Printf
|
||||
import Test.Tasty.Runners
|
||||
|
||||
newtype PlainFormat = PlainFormat { unPlainFormat :: Bool }
|
||||
deriving (Eq, Ord, Show, Typeable)
|
||||
|
||||
instance IsOption PlainFormat where
|
||||
defaultValue = PlainFormat False
|
||||
parseValue = fmap PlainFormat . safeReadBool
|
||||
optionName = pure "plain"
|
||||
optionHelp = pure "Produce machine-readable output: (mean in picoseconds, standard deviation in picoseconds). This is handy for consumption by other tasty ingredients."
|
||||
optionCLParser = mkFlagCLParser mempty (PlainFormat True)
|
||||
|
||||
newtype RelStDev = RelStDev { unRelStDev :: Double }
|
||||
deriving (Eq, Ord, Show, Typeable)
|
||||
|
||||
instance IsOption RelStDev where
|
||||
defaultValue = RelStDev 5
|
||||
parseValue = fmap RelStDev . safeRead
|
||||
optionName = pure "stdev"
|
||||
optionHelp = pure "Target relative standard deviation of measurements in percents (5 by default). Large values correspond to fast and loose benchmarks, and small ones to long and precise. If it takes far too long, consider setting --timeout, which will interrupt benchmarks, potentially before reaching the target deviation."
|
||||
|
||||
-- | Something that can be benchmarked.
|
||||
--
|
||||
-- Drop-in replacement for 'Criterion.Benchmarkable' and 'Gauge.Benchmarkable'.
|
||||
--
|
||||
newtype Benchmarkable = Benchmarkable { _unBenchmarkable :: Int64 -> IO () }
|
||||
deriving (Typeable)
|
||||
|
||||
showPicos :: Integer -> String
|
||||
showPicos i
|
||||
| a == 0 = "0"
|
||||
| a < 995 = printf "%3.0f ps" t
|
||||
| a < 995e1 = printf "%3.1f ns" (t / 1e3)
|
||||
| a < 995e3 = printf "%3.0f ns" (t / 1e3)
|
||||
| a < 995e4 = printf "%3.1f μs" (t / 1e6)
|
||||
| a < 995e6 = printf "%3.0f μs" (t / 1e6)
|
||||
| a < 995e7 = printf "%3.1f ms" (t / 1e9)
|
||||
| a < 995e9 = printf "%3.0f ms" (t / 1e9)
|
||||
| otherwise = printf "%.1f s" (t / 1e12)
|
||||
where
|
||||
t, a :: Double
|
||||
t = fromInteger i
|
||||
a = abs t
|
||||
|
||||
data Measurement = Measurement
|
||||
{ measMean :: !Integer -- ^ time in picoseconds
|
||||
, measSigma :: !Double -- ^ stdev in picoseconds
|
||||
} deriving (Eq, Ord)
|
||||
|
||||
instance Show Measurement where
|
||||
show (Measurement mean sigma) =
|
||||
-- Two sigmas correspond to 95% probability,
|
||||
showPicos mean ++ " ± " ++ showPicos (truncate (2 * sigma))
|
||||
|
||||
predict
|
||||
:: Integer -- ^ time for one run
|
||||
-> Integer -- ^ time for two runs
|
||||
-> Measurement
|
||||
predict t1 t2 = Measurement a (sqrt (fromInteger d))
|
||||
where
|
||||
sqr x = x * x
|
||||
d = sqr (t1 - a) + sqr (t2 - 2 * a)
|
||||
a = (t1 + 2 * t2) `quot` 5
|
||||
|
||||
predictPerturbed :: Integer -> Integer -> Measurement
|
||||
predictPerturbed t1 t2 = Measurement
|
||||
{ measMean = measMean (predict t1 t2)
|
||||
, measSigma = max
|
||||
(measSigma (predict (t1 - prec) (t2 + prec)))
|
||||
(measSigma (predict (t1 + prec) (t2 - prec)))
|
||||
}
|
||||
where
|
||||
prec = max cpuTimePrecision 1000000000 -- 1 ms
|
||||
|
||||
measureTime :: Int64 -> Benchmarkable -> IO Integer
|
||||
measureTime n (Benchmarkable act) = do
|
||||
performGC
|
||||
startTime <- getCPUTime
|
||||
act n
|
||||
endTime <- getCPUTime
|
||||
pure $ endTime - startTime
|
||||
|
||||
measureTimeUntil :: Maybe Integer -> Double -> Benchmarkable -> IO Measurement
|
||||
measureTimeUntil timeout targetRelStDev b = do
|
||||
t1 <- measureTime 1 b
|
||||
go 1 t1 0
|
||||
where
|
||||
go :: Int64 -> Integer -> Integer -> IO Measurement
|
||||
go n t1 sumOfTs = do
|
||||
t2 <- measureTime (2 * n) b
|
||||
|
||||
let Measurement meanN sigmaN = predictPerturbed t1 t2
|
||||
isTimeoutSoon = case timeout of
|
||||
Nothing -> False
|
||||
-- multiplying by 1.2 helps to avoid accidental timeouts
|
||||
Just t -> (sumOfTs + t1 + t2 + (2 * t2)) * 12 >= t * 10
|
||||
mean = meanN `quot` toInteger n
|
||||
sigma = sigmaN / fromIntegral n
|
||||
isStDevInTargetRange = sigma / fromInteger mean < targetRelStDev
|
||||
|
||||
if mean > 0 && (isStDevInTargetRange || isTimeoutSoon)
|
||||
then pure $ Measurement mean sigma
|
||||
else go (2 * n) t2 (sumOfTs + t1)
|
||||
|
||||
instance IsTest Benchmarkable where
|
||||
testOptions = pure [Option (Proxy :: Proxy RelStDev), Option (Proxy :: Proxy PlainFormat)]
|
||||
run opts b = const $ case getNumThreads (lookupOption opts) of
|
||||
1 -> do
|
||||
let targetRelStDev = unRelStDev (lookupOption opts) / 100
|
||||
timeout = case lookupOption opts of
|
||||
NoTimeout -> Nothing
|
||||
Timeout micros _ -> Just $ micros * 1000000
|
||||
|
||||
meas <- measureTimeUntil timeout targetRelStDev b
|
||||
let msg = if unPlainFormat (lookupOption opts)
|
||||
then show (measMean meas, measSigma meas)
|
||||
else show meas
|
||||
pure $ testPassed msg
|
||||
_ -> pure $ testFailed "Benchmarks should be run in a single-threaded mode (--jobs 1)"
|
||||
|
||||
-- | Attach a name to 'Benchmarkable'.
|
||||
--
|
||||
-- This is actually a synonym of 'Test.Tasty.Providers.singleTest'
|
||||
-- to provide an interface compatible with 'Criterion.bench' and 'Gauge.bench'.
|
||||
--
|
||||
bench :: String -> Benchmarkable -> Benchmark
|
||||
bench = singleTest
|
||||
|
||||
-- | Attach a name to a group of 'Benchmark'.
|
||||
--
|
||||
-- This is actually a synonym of 'Test.Tasty.testGroup'
|
||||
-- to provide an interface compatible with 'Criterion.bgroup'
|
||||
-- and 'Gauge.bgroup'.
|
||||
--
|
||||
bgroup :: String -> [Benchmark] -> Benchmark
|
||||
bgroup = testGroup
|
||||
|
||||
-- | Benchmarks are actually just a regular 'Test.Tasty.TestTree' in disguise.
|
||||
--
|
||||
-- This is a drop-in replacement for 'Criterion.Benchmark' and 'Gauge.Benchmark'.
|
||||
--
|
||||
type Benchmark = TestTree
|
||||
|
||||
-- | Run benchmarks and report results.
|
||||
--
|
||||
-- Wrapper around 'Test.Tasty.defaultMain'
|
||||
-- to provide an interface compatible with 'Criterion.defaultMain'
|
||||
-- and 'Gauge.defaultMain'.
|
||||
--
|
||||
defaultMain :: [Benchmark] -> IO ()
|
||||
defaultMain = Test.Tasty.defaultMain . testGroup "All"
|
||||
|
||||
funcToBench :: (b -> c) -> (a -> b) -> a -> Benchmarkable
|
||||
funcToBench frc = (Benchmarkable .) . go
|
||||
where
|
||||
go f x n
|
||||
| n <= 0 = pure ()
|
||||
| otherwise = do
|
||||
_ <- evaluate (frc (f x))
|
||||
go f x (n - 1)
|
||||
{-# INLINE funcToBench #-}
|
||||
|
||||
-- | 'nf' @f@ @x@ measures time to compute
|
||||
-- a normal form (by means of 'rnf') of @f@ @x@.
|
||||
--
|
||||
-- Note that forcing a normal form requires an additional
|
||||
-- traverse of the structure. In certain scenarios (imagine benchmarking 'tail'),
|
||||
-- especially when 'NFData' instance is badly written,
|
||||
-- this traversal may take non-negligible time and affect results.
|
||||
--
|
||||
-- Drop-in replacement for 'Criterion.nf' and 'Gauge.nf'.
|
||||
--
|
||||
nf :: NFData b => (a -> b) -> a -> Benchmarkable
|
||||
nf = funcToBench rnf
|
||||
{-# INLINE nf #-}
|
||||
|
||||
-- | 'whnf' @f@ @x@ measures time to compute
|
||||
-- a weak head normal form of @f@ @x@.
|
||||
--
|
||||
-- Computing only a weak head normal form is
|
||||
-- rarely what intuitively is meant by "evaluation".
|
||||
-- Unless you understand precisely, what is measured,
|
||||
-- it is recommended to use 'nf' instead.
|
||||
--
|
||||
-- Drop-in replacement for 'Criterion.whnf' and 'Gauge.whnf'.
|
||||
--
|
||||
whnf :: (a -> b) -> a -> Benchmarkable
|
||||
whnf = funcToBench id
|
||||
{-# INLINE whnf #-}
|
||||
|
||||
ioToBench :: (b -> c) -> IO b -> Benchmarkable
|
||||
ioToBench frc act = Benchmarkable go
|
||||
where
|
||||
go n
|
||||
| n <= 0 = pure ()
|
||||
| otherwise = do
|
||||
val <- act
|
||||
_ <- evaluate (frc val)
|
||||
go (n - 1)
|
||||
{-# INLINE ioToBench #-}
|
||||
|
||||
-- | 'nfIO' @x@ measures time to evaluate side-effects of @x@
|
||||
-- and compute its normal form (by means of 'rnf').
|
||||
--
|
||||
-- Pure subexpression of an effectful computation @x@
|
||||
-- may be evaluated only once and get cached; use 'nfAppIO'
|
||||
-- to avoid this.
|
||||
--
|
||||
-- Note that forcing a normal form requires an additional
|
||||
-- traverse of the structure. In certain scenarios,
|
||||
-- especially when 'NFData' instance is badly written,
|
||||
-- this traversal may take non-negligible time and affect results.
|
||||
--
|
||||
-- Drop-in replacement for 'Criterion.nfIO' and 'Gauge.nfIO'.
|
||||
--
|
||||
nfIO :: NFData a => IO a -> Benchmarkable
|
||||
nfIO = ioToBench rnf
|
||||
{-# INLINE nfIO #-}
|
||||
|
||||
-- | 'whnfIO' @x@ measures time to evaluate side-effects of @x@
|
||||
-- and compute its weak head normal form.
|
||||
--
|
||||
-- Pure subexpression of an effectful computation @x@
|
||||
-- may be evaluated only once and get cached; use 'whnfAppIO'
|
||||
-- to avoid this.
|
||||
--
|
||||
-- Computing only a weak head normal form is
|
||||
-- rarely what intuitively is meant by "evaluation".
|
||||
-- Unless you understand precisely, what is measured,
|
||||
-- it is recommended to use 'nfIO' instead.
|
||||
--
|
||||
-- Drop-in replacement for 'Criterion.whnfIO' and 'Gauge.whnfIO'.
|
||||
--
|
||||
whnfIO :: NFData a => IO a -> Benchmarkable
|
||||
whnfIO = ioToBench id
|
||||
{-# INLINE whnfIO #-}
|
||||
|
||||
ioFuncToBench :: (b -> c) -> (a -> IO b) -> a -> Benchmarkable
|
||||
ioFuncToBench frc = (Benchmarkable .) . go
|
||||
where
|
||||
go f x n
|
||||
| n <= 0 = pure ()
|
||||
| otherwise = do
|
||||
val <- f x
|
||||
_ <- evaluate (frc val)
|
||||
go f x (n - 1)
|
||||
{-# INLINE ioFuncToBench #-}
|
||||
|
||||
-- | 'nfAppIO' @f@ @x@ measures time to evaluate side-effects of @f@ @x@
|
||||
-- and compute its normal form (by means of 'rnf').
|
||||
--
|
||||
-- Note that forcing a normal form requires an additional
|
||||
-- traverse of the structure. In certain scenarios,
|
||||
-- especially when 'NFData' instance is badly written,
|
||||
-- this traversal may take non-negligible time and affect results.
|
||||
--
|
||||
-- Drop-in replacement for 'Criterion.nfAppIO' and 'Gauge.nfAppIO'.
|
||||
--
|
||||
nfAppIO :: NFData b => (a -> IO b) -> a -> Benchmarkable
|
||||
nfAppIO = ioFuncToBench rnf
|
||||
{-# INLINE nfAppIO #-}
|
||||
|
||||
-- | 'whnfAppIO' @f@ @x@ measures time to evaluate side-effects of @f@ @x@
|
||||
-- and compute its weak head normal form.
|
||||
--
|
||||
-- Computing only a weak head normal form is
|
||||
-- rarely what intuitively is meant by "evaluation".
|
||||
-- Unless you understand precisely, what is measured,
|
||||
-- it is recommended to use 'nfAppIO' instead.
|
||||
--
|
||||
-- Drop-in replacement for 'Criterion.whnfAppIO' and 'Gauge.whnfAppIO'.
|
||||
--
|
||||
whnfAppIO :: (a -> IO b) -> a -> Benchmarkable
|
||||
whnfAppIO = ioFuncToBench id
|
||||
{-# INLINE whnfAppIO #-}
|
3
changelog.md
Normal file
3
changelog.md
Normal file
@ -0,0 +1,3 @@
|
||||
# 0.1.0.0
|
||||
|
||||
* Initial release.
|
42
tasty-bench.cabal
Normal file
42
tasty-bench.cabal
Normal file
@ -0,0 +1,42 @@
|
||||
name: tasty-bench
|
||||
version: 0.1
|
||||
cabal-version: >=1.10
|
||||
build-type: Simple
|
||||
license: MIT
|
||||
license-file: LICENSE
|
||||
copyright: 2021 Andrew Lelechenko
|
||||
maintainer: Andrew Lelechenko <andrew.lelechenko@gmail.com>
|
||||
homepage: https://github.com/Bodigrim/tasty-bench
|
||||
bug-reports: https://github.com/Bodigrim/tasty-bench/issues
|
||||
category: Development, Performance, Testing, Benchmarking
|
||||
synopsis: Featherlight benchmark framework
|
||||
description:
|
||||
Featherlight framework (only one file!)
|
||||
for performance measurement with API mimicking
|
||||
@criterion@ and @gauge@. Our benchmarks are just
|
||||
regular @tasty@ tests.
|
||||
|
||||
extra-source-files:
|
||||
changelog.md
|
||||
README.md
|
||||
|
||||
tested-with: GHC==8.10.3, GHC==8.8.4, GHC==8.6.5, GHC==8.4.4, GHC==8.2.2, GHC==8.0.2, GHC==7.10.3, GHC==7.8.4, GHC==7.6.3, GHC==7.4.2, GHC==7.2.2, GHC==7.0.4
|
||||
|
||||
source-repository head
|
||||
type: git
|
||||
location: https://github.com/Bodigrim/tasty-bench
|
||||
|
||||
library
|
||||
exposed-modules: Test.Tasty.Bench
|
||||
hs-source-dirs: .
|
||||
default-language: Haskell2010
|
||||
default-extensions: DeriveDataTypeable
|
||||
ghc-options: -Wall -fno-warn-unused-imports
|
||||
|
||||
build-depends:
|
||||
base >= 4.3 && < 5,
|
||||
deepseq >= 1.1,
|
||||
tasty >= 1.0.1
|
||||
if impl(ghc < 7.8)
|
||||
build-depends:
|
||||
tagged >= 0.2
|
Loading…
Reference in New Issue
Block a user