hyperfine/scripts/welch_ttest.py

34 lines
831 B
Python
Raw Normal View History

2019-10-07 08:57:42 +03:00
#!/usr/bin/python
import argparse
import json
2019-10-07 19:36:00 +03:00
import sys
2019-10-07 08:57:42 +03:00
from scipy import stats
parser = argparse.ArgumentParser()
2019-10-07 19:36:00 +03:00
parser.add_argument("file", help="JSON file with two benchmark results")
2019-10-07 08:57:42 +03:00
args = parser.parse_args()
with open(args.file) as f:
results = json.load(f)["results"]
2019-10-07 19:36:00 +03:00
if len(results) != 2:
print("The input file has to contain exactly two benchmarks")
sys.exit(1)
2019-10-07 08:57:42 +03:00
a, b = [x["command"] for x in results[:2]]
X, Y = [x["times"] for x in results[:2]]
print("Command 1: {}".format(a))
print("Command 2: {}\n".format(b))
t, p = stats.ttest_ind(X, Y, equal_var=False)
th = 0.05
dispose = p < th
print("t = {}, p = {}".format(t, p))
print("p < {} = {}\n".format(th, dispose))
if dispose:
print("There is a difference between the two benchmarks.")
else:
2019-10-07 19:36:00 +03:00
print("The two benchmarks are almost the same.")