forked from alibaba/AliSQL
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathregression_check.py
More file actions
115 lines (101 loc) · 3.11 KB
/
regression_check.py
File metadata and controls
115 lines (101 loc) · 3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import sys
import duckdb
import numpy
import subprocess
from io import StringIO
import csv
import statistics
old_file = None
new_file = None
# the threshold at which we consider something a regression (percentage)
regression_threshold_percentage = 0.1
# minimal seconds diff for something to be a regression (for very fast benchmarks)
regression_threshold_seconds = 0.01
for arg in sys.argv:
if arg.startswith("--old="):
old_file = arg.replace("--old=", "")
elif arg.startswith("--new="):
new_file = arg.replace("--new=", "")
if old_file is None or new_file is None:
print("Usage: python scripts/regression_check.py --old=<old_file> --new-<new_file>")
exit(1)
con = duckdb.connect()
old_timings_l = con.execute(
f"SELECT name, median(time) FROM read_csv_auto('{old_file}') t(name, nrun, time) GROUP BY ALL ORDER BY ALL"
).fetchall()
new_timings_l = con.execute(
f"SELECT name, median(time) FROM read_csv_auto('{new_file}') t(name, nrun, time) GROUP BY ALL ORDER BY ALL"
).fetchall()
old_timings = {}
new_timings = {}
for entry in old_timings_l:
name = entry[0]
timing = entry[1]
old_timings[name] = timing
for entry in new_timings_l:
name = entry[0]
timing = entry[1]
new_timings[name] = timing
slow_keys = []
multiply_percentage = 1.0 + regression_threshold_percentage
test_keys = list(new_timings.keys())
test_keys.sort()
for key in test_keys:
new_timing = new_timings[key]
old_timing = old_timings[key]
if (old_timing + regression_threshold_seconds) * multiply_percentage < new_timing:
slow_keys.append(key)
return_code = 0
if len(slow_keys) > 0:
print(
'''====================================================
============== REGRESSIONS DETECTED =============
====================================================
'''
)
return_code = 1
for key in slow_keys:
new_timing = new_timings[key]
old_timing = old_timings[key]
print(key)
print(f"Old timing: {old_timing}")
print(f"New timing: {new_timing}")
print("")
print(
'''====================================================
================== New Timings ==================
====================================================
'''
)
with open(new_file, 'r') as f:
print(f.read())
print(
'''====================================================
================== Old Timings ==================
====================================================
'''
)
with open(old_file, 'r') as f:
print(f.read())
else:
print(
'''====================================================
============== NO REGRESSIONS DETECTED =============
====================================================
'''
)
print(
'''====================================================
=================== ALL TIMINGS ===================
====================================================
'''
)
for key in test_keys:
new_timing = new_timings[key]
old_timing = old_timings[key]
print(key)
print(f"Old timing: {old_timing}")
print(f"New timing: {new_timing}")
print("")
exit(return_code)