2020-10-08 23:51:31 +03:00
|
|
|
#!/usr/bin/env python
|
2019-10-07 08:57:42 +03:00
|
|
|
|
2019-10-07 19:44:55 +03:00
|
|
|
"""This script performs Welch's t-test on a JSON export file with two
|
|
|
|
benchmark results to test whether or not the two distributions are
|
|
|
|
the same."""
|
|
|
|
|
2019-10-07 08:57:42 +03:00
|
|
|
import argparse
|
|
|
|
import json
|
2019-10-07 19:36:00 +03:00
|
|
|
import sys
|
2019-10-07 08:57:42 +03:00
|
|
|
from scipy import stats
|
|
|
|
|
2019-10-07 19:44:55 +03:00
|
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
2019-10-07 19:36:00 +03:00
|
|
|
parser.add_argument("file", help="JSON file with two benchmark results")
|
2019-10-07 08:57:42 +03:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
with open(args.file) as f:
|
|
|
|
results = json.load(f)["results"]
|
|
|
|
|
2019-10-07 19:36:00 +03:00
|
|
|
if len(results) != 2:
|
|
|
|
print("The input file has to contain exactly two benchmarks")
|
|
|
|
sys.exit(1)
|
|
|
|
|
2019-10-07 08:57:42 +03:00
|
|
|
a, b = [x["command"] for x in results[:2]]
|
|
|
|
X, Y = [x["times"] for x in results[:2]]
|
|
|
|
|
|
|
|
print("Command 1: {}".format(a))
|
|
|
|
print("Command 2: {}\n".format(b))
|
|
|
|
|
|
|
|
t, p = stats.ttest_ind(X, Y, equal_var=False)
|
|
|
|
th = 0.05
|
|
|
|
dispose = p < th
|
2019-10-07 19:58:07 +03:00
|
|
|
print("t = {:.3}, p = {:.3}".format(t, p))
|
|
|
|
print()
|
|
|
|
|
2019-10-07 08:57:42 +03:00
|
|
|
if dispose:
|
2020-04-01 10:35:06 +03:00
|
|
|
print("There is a difference between the two benchmarks (p < {}).".format(th))
|
2019-10-07 08:57:42 +03:00
|
|
|
else:
|
2019-10-07 19:58:07 +03:00
|
|
|
print("The two benchmarks are almost the same (p >= {}).".format(th))
|