Skip to content

Commit

Permalink
fix: updates tests based on revised hacker_news tables (#1591)
Browse files Browse the repository at this point in the history
This fixes four broken tests that failed due to an unexpected change in the Google Public Dataset: Hacker News.

The `comments` table was deleted and only the `full` table remained.

This edit updates the name of the table in four tests and updates the names of columns in the table as well as updates the expected results for one of the tests.

Fixes #1590 🦕
  • Loading branch information
chalmerlowe authored Jun 21, 2023
1 parent 41799b4 commit d73cf49
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 24 deletions.
29 changes: 15 additions & 14 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1706,8 +1706,8 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self):

cursor.execute(
"""
SELECT id, `by`, time_ts
FROM `bigquery-public-data.hacker_news.comments`
SELECT id, `by`, timestamp
FROM `bigquery-public-data.hacker_news.full`
ORDER BY `id` ASC
LIMIT 100000
"""
Expand All @@ -1717,27 +1717,28 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self):

field_name = operator.itemgetter(0)
fetched_data = [sorted(row.items(), key=field_name) for row in result_rows]

# Since DB API is not thread safe, only a single result stream should be
# requested by the BQ storage client, meaning that results should arrive
# in the sorted order.

expected_data = [
[
("by", "sama"),
("id", 15),
("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)),
("by", "pg"),
("id", 1),
("timestamp", datetime.datetime(2006, 10, 9, 18, 21, 51, tzinfo=UTC)),
],
[
("by", "pg"),
("id", 17),
("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)),
("by", "phyllis"),
("id", 2),
("timestamp", datetime.datetime(2006, 10, 9, 18, 30, 28, tzinfo=UTC)),
],
[
("by", "pg"),
("id", 22),
("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)),
("by", "phyllis"),
("id", 3),
("timestamp", datetime.datetime(2006, 10, 9, 18, 40, 33, tzinfo=UTC)),
],
]

self.assertEqual(fetched_data, expected_data)

def test_dbapi_dry_run_query(self):
Expand Down Expand Up @@ -1769,8 +1770,8 @@ def test_dbapi_connection_does_not_leak_sockets(self):

cursor.execute(
"""
SELECT id, `by`, time_ts
FROM `bigquery-public-data.hacker_news.comments`
SELECT id, `by`, timestamp
FROM `bigquery-public-data.hacker_news.full`
ORDER BY `id` ASC
LIMIT 100000
"""
Expand Down
20 changes: 10 additions & 10 deletions tests/system/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,21 +740,21 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats(

def test_query_results_to_dataframe(bigquery_client):
QUERY = """
SELECT id, author, time_ts, dead
FROM `bigquery-public-data.hacker_news.comments`
SELECT id, `by`, timestamp, dead
FROM `bigquery-public-data.hacker_news.full`
LIMIT 10
"""

df = bigquery_client.query(QUERY).result().to_dataframe()

assert isinstance(df, pandas.DataFrame)
assert len(df) == 10 # verify the number of rows
column_names = ["id", "author", "time_ts", "dead"]
column_names = ["id", "by", "timestamp", "dead"]
assert list(df) == column_names # verify the column names
exp_datatypes = {
"id": int,
"author": str,
"time_ts": pandas.Timestamp,
"by": str,
"timestamp": pandas.Timestamp,
"dead": bool,
}
for _, row in df.iterrows():
Expand All @@ -766,8 +766,8 @@ def test_query_results_to_dataframe(bigquery_client):

def test_query_results_to_dataframe_w_bqstorage(bigquery_client):
query = """
SELECT id, author, time_ts, dead
FROM `bigquery-public-data.hacker_news.comments`
SELECT id, `by`, timestamp, dead
FROM `bigquery-public-data.hacker_news.full`
LIMIT 10
"""

Expand All @@ -779,12 +779,12 @@ def test_query_results_to_dataframe_w_bqstorage(bigquery_client):

assert isinstance(df, pandas.DataFrame)
assert len(df) == 10 # verify the number of rows
column_names = ["id", "author", "time_ts", "dead"]
column_names = ["id", "by", "timestamp", "dead"]
assert list(df) == column_names
exp_datatypes = {
"id": int,
"author": str,
"time_ts": pandas.Timestamp,
"by": str,
"timestamp": pandas.Timestamp,
"dead": bool,
}
for index, row in df.iterrows():
Expand Down

0 comments on commit d73cf49

Please sign in to comment.