Skip to content

Commit

Permalink
feat: support RANGE in queries Part 1: JSON (#1884)
Browse files Browse the repository at this point in the history
* feat: support range in queries as dict

* fix sys tests

* lint

* fix typo
  • Loading branch information
Linchin authored Apr 10, 2024
1 parent d08ca70 commit 3634405
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 4 deletions.
41 changes: 41 additions & 0 deletions google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,46 @@ def _json_from_json(value, field):
return None


def _range_element_from_json(value, field):
"""Coerce 'value' to a range element value, if set or not nullable."""
if value == "UNBOUNDED":
return None
elif field.element_type == "DATE":
return _date_from_json(value, None)
elif field.element_type == "DATETIME":
return _datetime_from_json(value, None)
elif field.element_type == "TIMESTAMP":
return _timestamp_from_json(value, None)
else:
raise ValueError(f"Unsupported range field type: {value}")


def _range_from_json(value, field):
"""Coerce 'value' to a range, if set or not nullable.
Args:
value (str): The literal representation of the range.
field (google.cloud.bigquery.schema.SchemaField):
The field corresponding to the value.
Returns:
Optional[dict]:
The parsed range object from ``value`` if the ``field`` is not
null (otherwise it is :data:`None`).
"""
range_literal = re.compile(r"\[.*, .*\)")
if _not_null(value, field):
if range_literal.match(value):
start, end = value[1:-1].split(", ")
start = _range_element_from_json(start, field.range_element_type)
end = _range_element_from_json(end, field.range_element_type)
return {"start": start, "end": end}
else:
raise ValueError(f"Unknown range format: {value}")
else:
return None


# Parse BigQuery API response JSON into a Python representation.
_CELLDATA_FROM_JSON = {
"INTEGER": _int_from_json,
Expand All @@ -329,6 +369,7 @@ def _json_from_json(value, field):
"TIME": _time_from_json,
"RECORD": _record_from_json,
"JSON": _json_from_json,
"RANGE": _range_from_json,
}

_QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON)
Expand Down
5 changes: 5 additions & 0 deletions tests/system/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
_naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
_naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000)
_stamp = "%s %s" % (_naive.date().isoformat(), _naive.time().isoformat())
_date = _naive.date().isoformat()
_stamp_microseconds = _stamp + ".250000"
_zoned = _naive.replace(tzinfo=UTC)
_zoned_microseconds = _naive_microseconds.replace(tzinfo=UTC)
Expand Down Expand Up @@ -78,6 +79,10 @@
),
("SELECT ARRAY(SELECT STRUCT([1, 2]))", [{"_field_1": [1, 2]}]),
("SELECT ST_GeogPoint(1, 2)", "POINT(1 2)"),
(
"SELECT RANGE<DATE> '[UNBOUNDED, %s)'" % _date,
{"start": None, "end": _naive.date()},
),
]


Expand Down
6 changes: 3 additions & 3 deletions tests/system/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ def test_query_statistics(bigquery_client, query_api_method):
),
(
"SELECT @range_date",
"[2016-12-05, UNBOUNDED)",
{"end": None, "start": datetime.date(2016, 12, 5)},
[
RangeQueryParameter(
name="range_date",
Expand All @@ -436,7 +436,7 @@ def test_query_statistics(bigquery_client, query_api_method):
),
(
"SELECT @range_datetime",
"[2016-12-05T00:00:00, UNBOUNDED)",
{"end": None, "start": datetime.datetime(2016, 12, 5, 0, 0)},
[
RangeQueryParameter(
name="range_datetime",
Expand All @@ -447,7 +447,7 @@ def test_query_statistics(bigquery_client, query_api_method):
),
(
"SELECT @range_unbounded",
"[UNBOUNDED, UNBOUNDED)",
{"end": None, "start": None},
[
RangeQueryParameter(
name="range_unbounded",
Expand Down
105 changes: 104 additions & 1 deletion tests/unit/test__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,99 @@ def test_w_bogus_string_value(self):
self._call_fut("12:12:27.123", object())


class Test_range_from_json(unittest.TestCase):
def _call_fut(self, value, field):
from google.cloud.bigquery._helpers import _range_from_json

return _range_from_json(value, field)

def test_w_none_nullable(self):
self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))

def test_w_none_required(self):
with self.assertRaises(TypeError):
self._call_fut(None, _Field("REQUIRED"))

def test_w_wrong_format(self):
range_field = _Field(
"NULLABLE",
field_type="RANGE",
range_element_type=_Field("NULLABLE", element_type="DATE"),
)
with self.assertRaises(ValueError):
self._call_fut("[2009-06-172019-06-17)", range_field)

def test_w_wrong_element_type(self):
range_field = _Field(
"NULLABLE",
field_type="RANGE",
range_element_type=_Field("NULLABLE", element_type="TIME"),
)
with self.assertRaises(ValueError):
self._call_fut("[15:31:38, 15:50:38)", range_field)

def test_w_unbounded_value(self):
range_field = _Field(
"NULLABLE",
field_type="RANGE",
range_element_type=_Field("NULLABLE", element_type="DATE"),
)
coerced = self._call_fut("[UNBOUNDED, 2019-06-17)", range_field)
self.assertEqual(
coerced,
{"start": None, "end": datetime.date(2019, 6, 17)},
)

def test_w_date_value(self):
range_field = _Field(
"NULLABLE",
field_type="RANGE",
range_element_type=_Field("NULLABLE", element_type="DATE"),
)
coerced = self._call_fut("[2009-06-17, 2019-06-17)", range_field)
self.assertEqual(
coerced,
{
"start": datetime.date(2009, 6, 17),
"end": datetime.date(2019, 6, 17),
},
)

def test_w_datetime_value(self):
range_field = _Field(
"NULLABLE",
field_type="RANGE",
range_element_type=_Field("NULLABLE", element_type="DATETIME"),
)
coerced = self._call_fut(
"[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field
)
self.assertEqual(
coerced,
{
"start": datetime.datetime(2009, 6, 17, 13, 45, 30),
"end": datetime.datetime(2019, 6, 17, 13, 45, 30),
},
)

def test_w_timestamp_value(self):
from google.cloud._helpers import _EPOCH

range_field = _Field(
"NULLABLE",
field_type="RANGE",
range_element_type=_Field("NULLABLE", element_type="TIMESTAMP"),
)
coerced = self._call_fut("[1234567, 1234789)", range_field)
self.assertEqual(
coerced,
{
"start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567),
"end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789),
},
)


class Test_record_from_json(unittest.TestCase):
def _call_fut(self, value, field):
from google.cloud.bigquery._helpers import _record_from_json
Expand Down Expand Up @@ -1323,11 +1416,21 @@ def test_w_str(self):


class _Field(object):
def __init__(self, mode, name="unknown", field_type="UNKNOWN", fields=()):
def __init__(
self,
mode,
name="unknown",
field_type="UNKNOWN",
fields=(),
range_element_type=None,
element_type=None,
):
self.mode = mode
self.name = name
self.field_type = field_type
self.fields = fields
self.range_element_type = range_element_type
self.element_type = element_type


def _field_isinstance_patcher():
Expand Down

0 comments on commit 3634405

Please sign in to comment.