Skip to content

Commit c46b3a0

Browse files
Optimize CI: consolidate workflows, fix caching, speed up e2e tests (47min → 15min) (#772)
* Optimize CI: consolidate workflows, fix caching, speed up e2e tests Workflow consolidation: - Delete integration.yml and daily-telemetry-e2e.yml (redundant with coverage workflow which already runs all e2e tests) - Add push-to-main trigger to coverage workflow - Run all tests (including telemetry) in single pytest invocation with --dist=loadgroup to respect xdist_group markers for isolation Fix pyarrow cache: - Remove cache-path: .venv-pyarrow from pyarrow jobs. Poetry always creates .venv regardless of the cache-path input, so the cache was never saved ("Path does not exist" error). The cache-suffix already differentiates keys between variants. Fix 3.14 post-test DNS hang: - Add enable_telemetry=False to unit test DUMMY_CONNECTION_ARGS that use server_hostname="foo". This prevents FeatureFlagsContext from making real HTTP calls to fake hosts, eliminating ~8min hang from ThreadPoolExecutor threads timing out on DNS on protected runners. Improve e2e test parallelization: - Split TestPySQLLargeQueriesSuite into 3 separate classes (TestPySQLLargeWideResultSet, TestPySQLLargeNarrowResultSet, TestPySQLLongRunningQuery) so xdist distributes them across workers instead of all landing on one. Speed up slow tests: - Reduce large result set sizes from 300MB to 100MB (still validates large fetches, lz4, chunking, row integrity) - Start test_long_running_query at scale_factor=50 instead of 1 to skip ramp-up iterations that finish instantly Co-authored-by: Isaac Signed-off-by: Vikrant Puppala <[email protected]> * Further optimize e2e: 4 workers, lower long-query threshold, split lz4 - Use -n 4 instead of -n auto in coverage workflow. The e2e tests are network-bound (waiting on warehouse), not CPU-bound, so 4 workers on a 2-CPU runner is fine and doubles parallelism. - Lower test_long_running_query min_duration from 3 min to 1 min. The test validates long-running query completion — 1 minute is sufficient and saves ~4 min per variant. - Split lz4 on/off loop in test_query_with_large_wide_result_set into separate parametrized test cases so xdist can run them on different workers instead of sequentially in one test. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala <[email protected]> * Address review: inline test methods, drop mixin pattern Per review feedback from jprakash-db: - Remove mixin classes (LargeWideResultSetMixin, etc) — inline the test methods directly into the test classes in test_driver.py - Remove backward-compat LargeQueriesMixin alias (nothing uses it) - Rename _LargeQueryRowHelper — replaced entirely by inlining - Convert large_queries_mixin.py to just a fetch_rows() helper function Co-authored-by: Isaac Signed-off-by: Vikrant Puppala <[email protected]> --------- Signed-off-by: Vikrant Puppala <[email protected]>
1 parent 32c446b commit c46b3a0

File tree

8 files changed

+123
-283
lines changed

8 files changed

+123
-283
lines changed

.github/workflows/code-coverage.yml

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
1-
name: Code Coverage
1+
name: E2E Tests and Code Coverage
22

33
permissions:
44
contents: read
55
id-token: write
66

7-
on: [pull_request, workflow_dispatch]
7+
on:
8+
push:
9+
branches:
10+
- main
11+
pull_request:
12+
workflow_dispatch:
813

914
jobs:
1015
test-with-coverage:
@@ -32,25 +37,16 @@ jobs:
3237
with:
3338
python-version: "3.10"
3439
install-args: "--all-extras"
35-
- name: Run parallel tests with coverage
40+
- name: Run all tests with coverage
3641
continue-on-error: false
3742
run: |
3843
poetry run pytest tests/unit tests/e2e \
39-
-m "not serial" \
40-
-n auto \
44+
-n 4 \
45+
--dist=loadgroup \
4146
--cov=src \
4247
--cov-report=xml \
4348
--cov-report=term \
4449
-v
45-
- name: Run telemetry tests with coverage (isolated)
46-
continue-on-error: false
47-
run: |
48-
poetry run pytest tests/e2e/test_concurrent_telemetry.py \
49-
--cov=src \
50-
--cov-append \
51-
--cov-report=xml \
52-
--cov-report=term \
53-
-v
5450
- name: Check for coverage override
5551
id: override
5652
env:

.github/workflows/code-quality-checks.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ jobs:
7878
with:
7979
python-version: ${{ matrix.python-version }}
8080
install-args: "--all-extras"
81-
cache-path: ".venv-pyarrow"
8281
cache-suffix: "pyarrow-${{ matrix.dependency-version }}-"
8382
- name: Install Python tools for custom versions
8483
if: matrix.dependency-version != 'default'

.github/workflows/daily-telemetry-e2e.yml

Lines changed: 0 additions & 58 deletions
This file was deleted.

.github/workflows/integration.yml

Lines changed: 0 additions & 71 deletions
This file was deleted.

tests/e2e/common/large_queries_mixin.py

Lines changed: 31 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -2,139 +2,43 @@
22
import math
33
import time
44

5-
import pytest
6-
75
log = logging.getLogger(__name__)
86

97

10-
class LargeQueriesMixin:
8+
def fetch_rows(test_case, cursor, row_count, fetchmany_size):
119
"""
12-
This mixin expects to be mixed with a CursorTest-like class
10+
A generator for rows. Fetches until the end or up to 5 minutes.
1311
"""
14-
15-
def fetch_rows(self, cursor, row_count, fetchmany_size):
16-
"""
17-
A generator for rows. Fetches until the end or up to 5 minutes.
18-
"""
19-
# TODO: Remove fetchmany_size when we have fixed the performance issues with fetchone
20-
# in the Python client
21-
max_fetch_time = 5 * 60 # Fetch for at most 5 minutes
22-
23-
rows = self.get_some_rows(cursor, fetchmany_size)
24-
start_time = time.time()
25-
n = 0
26-
while rows:
27-
for row in rows:
28-
n += 1
29-
yield row
30-
if time.time() - start_time >= max_fetch_time:
31-
log.warning("Fetching rows timed out")
32-
break
33-
rows = self.get_some_rows(cursor, fetchmany_size)
34-
if not rows:
35-
# Read all the rows, row_count should match
36-
self.assertEqual(n, row_count)
37-
38-
num_fetches = max(math.ceil(n / 10000), 1)
39-
latency_ms = int((time.time() - start_time) * 1000 / num_fetches), 1
40-
print(
41-
"Fetched {} rows with an avg latency of {} per fetch, ".format(
42-
n, latency_ms
43-
)
44-
+ "assuming 10K fetch size."
12+
max_fetch_time = 5 * 60 # Fetch for at most 5 minutes
13+
14+
rows = _get_some_rows(cursor, fetchmany_size)
15+
start_time = time.time()
16+
n = 0
17+
while rows:
18+
for row in rows:
19+
n += 1
20+
yield row
21+
if time.time() - start_time >= max_fetch_time:
22+
log.warning("Fetching rows timed out")
23+
break
24+
rows = _get_some_rows(cursor, fetchmany_size)
25+
if not rows:
26+
# Read all the rows, row_count should match
27+
test_case.assertEqual(n, row_count)
28+
29+
num_fetches = max(math.ceil(n / 10000), 1)
30+
latency_ms = int((time.time() - start_time) * 1000 / num_fetches), 1
31+
print(
32+
"Fetched {} rows with an avg latency of {} per fetch, ".format(
33+
n, latency_ms
4534
)
46-
47-
@pytest.mark.parametrize(
48-
"extra_params",
49-
[
50-
{},
51-
{"use_sea": True},
52-
],
35+
+ "assuming 10K fetch size."
5336
)
54-
def test_query_with_large_wide_result_set(self, extra_params):
55-
resultSize = 300 * 1000 * 1000 # 300 MB
56-
width = 8192 # B
57-
rows = resultSize // width
58-
cols = width // 36
59-
60-
# Set the fetchmany_size to get 10MB of data a go
61-
fetchmany_size = 10 * 1024 * 1024 // width
62-
# This is used by PyHive tests to determine the buffer size
63-
self.arraysize = 1000
64-
with self.cursor(extra_params) as cursor:
65-
for lz4_compression in [False, True]:
66-
cursor.connection.lz4_compression = lz4_compression
67-
uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)])
68-
cursor.execute(
69-
"SELECT id, {uuids} FROM RANGE({rows})".format(
70-
uuids=uuids, rows=rows
71-
)
72-
)
73-
assert lz4_compression == cursor.active_result_set.lz4_compressed
74-
for row_id, row in enumerate(
75-
self.fetch_rows(cursor, rows, fetchmany_size)
76-
):
77-
assert row[0] == row_id # Verify no rows are dropped in the middle.
78-
assert len(row[1]) == 36
79-
80-
@pytest.mark.parametrize(
81-
"extra_params",
82-
[
83-
{},
84-
{"use_sea": True},
85-
],
86-
)
87-
def test_query_with_large_narrow_result_set(self, extra_params):
88-
resultSize = 300 * 1000 * 1000 # 300 MB
89-
width = 8 # sizeof(long)
90-
rows = resultSize / width
91-
92-
# Set the fetchmany_size to get 10MB of data a go
93-
fetchmany_size = 10 * 1024 * 1024 // width
94-
# This is used by PyHive tests to determine the buffer size
95-
self.arraysize = 10000000
96-
with self.cursor(extra_params) as cursor:
97-
cursor.execute("SELECT * FROM RANGE({rows})".format(rows=rows))
98-
for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)):
99-
assert row[0] == row_id
100-
101-
@pytest.mark.parametrize(
102-
"extra_params",
103-
[
104-
{},
105-
{"use_sea": True},
106-
],
107-
)
108-
def test_long_running_query(self, extra_params):
109-
"""Incrementally increase query size until it takes at least 3 minutes,
110-
and asserts that the query completes successfully.
111-
"""
112-
minutes = 60
113-
min_duration = 3 * minutes
114-
115-
duration = -1
116-
scale0 = 10000
117-
scale_factor = 1
118-
with self.cursor(extra_params) as cursor:
119-
while duration < min_duration:
120-
assert scale_factor < 4096, "Detected infinite loop"
121-
start = time.time()
122-
123-
cursor.execute(
124-
"""SELECT count(*)
125-
FROM RANGE({scale}) x
126-
JOIN RANGE({scale0}) y
127-
ON from_unixtime(x.id * y.id, "yyyy-MM-dd") LIKE "%not%a%date%"
128-
""".format(
129-
scale=scale_factor * scale0, scale0=scale0
130-
)
131-
)
13237

133-
(n,) = cursor.fetchone()
134-
assert n == 0
13538

136-
duration = time.time() - start
137-
current_fraction = duration / min_duration
138-
print("Took {} s with scale factor={}".format(duration, scale_factor))
139-
# Extrapolate linearly to reach 3 min and add 50% padding to push over the limit
140-
scale_factor = math.ceil(1.5 * scale_factor / current_fraction)
39+
def _get_some_rows(cursor, fetchmany_size):
40+
row = cursor.fetchone()
41+
if row:
42+
return [row]
43+
else:
44+
return None

0 commit comments

Comments
 (0)