Skip to content

Commit f7142c3

Browse files
committed
Documentation for auto paging
1 parent 6d437f0 commit f7142c3

5 files changed

Lines changed: 141 additions & 3 deletions

File tree

cassandra/cluster.py

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
This module houses the main classes you will interact with,
33
:class:`.Cluster` and :class:`.Session`.
44
"""
5+
from __future__ import absolute_import
56

67
from concurrent.futures import ThreadPoolExecutor
78
import logging
@@ -147,8 +148,15 @@ class Cluster(object):
147148
server will be automatically used.
148149
"""
149150

150-
# TODO: docs
151151
protocol_version = 2
152+
"""
153+
The version of the native protocol to use. The protocol version 2
154+
add support for lightweight transactions, batch operations, and
155+
automatic query paging, but is only supported by Cassandra 2.0+. When
156+
working with Cassandra 1.2, this must be set to 1. You can also set
157+
this to 1 when working with Cassandra 2.0+, but features that require
158+
the version 2 protocol will not be enabled.
159+
"""
152160

153161
compression = True
154162
"""
@@ -941,10 +949,10 @@ class Session(object):
941949
default_fetch_size = 5000
942950
"""
943951
By default, this many rows will be fetched at a time. This can be
944-
specified per-query through :attr:`~Statement.fetch_size`.
952+
specified per-query through :attr:`.Statement.fetch_size`.
945953
946954
This only takes effect when protocol version 2 or higher is used.
947-
See :attr:`~Cluster.protocol_version` for details.
955+
See :attr:`.Cluster.protocol_version` for details.
948956
"""
949957

950958
_lock = None
@@ -1970,9 +1978,21 @@ def _query(self, host, message=None, cb=None):
19701978

19711979
@property
19721980
def has_more_pages(self):
1981+
"""
1982+
Returns :const:`True` if there are more pages left in the
1983+
query results, :const:`False` otherwise. This should only
1984+
be checked after the first page has been returned.
1985+
"""
19731986
return self._paging_state is not None
19741987

19751988
def start_fetching_next_page(self):
1989+
"""
1990+
If there are more pages left in the query result, this asynchronously
1991+
starts fetching the next page. If there are no pages left, :exc:`.QueryExhausted`
1992+
is raised. Also see :attr:`.has_more_pages`.
1993+
1994+
This should only be called after the first page has been returned.
1995+
"""
19761996
if not self._paging_state:
19771997
raise QueryExhausted()
19781998

@@ -2388,10 +2408,33 @@ def __str__(self):
23882408

23892409

23902410
class QueryExhausted(Exception):
2411+
"""
2412+
Raised when :meth:`.ResultSet.start_fetching_next_page()` is called and
2413+
there are no more pages. You can check :attr:`.ResultSet.has_more_pages`
2414+
before calling to avoid this.
2415+
"""
23912416
pass
23922417

23932418

23942419
class PagedResult(object):
2420+
"""
2421+
An iterator over the rows from a paged query result. Whenever the number
2422+
of result rows for a query exceed the :attr:`~.query.Statement.fetch_size`
2423+
(or :attr:`~.Session.default_fetch_size`, if not set) an instance of this
2424+
class will be returned.
2425+
2426+
You can treat this as a normal iterator over rows::
2427+
2428+
>>> from cassandra.query import SimpleStatement
2429+
>>> statement = SimpleStatement("SELECT * FROM users", fetch_size=10)
2430+
>>> for user_row in session.execute(statement):
2431+
... process_user(user_row)
2432+
2433+
Whenever there are no more rows in the current page, the next page will
2434+
be fetched transparently. However, note that it _is_ possible for
2435+
an :class:`Exception` to be raised while fetching the next page, just
2436+
like you might see on a normal call to ``session.execute()``.
2437+
"""
23952438

23962439
def __init__(self, response_future, initial_response):
23972440
self.response_future = response_future

cassandra/query.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@ class Statement(object):
7272
"""
7373

7474
fetch_size = None
75+
"""
76+
How many rows will be fetched at a time. This overrides the default
77+
of :attr:`.Session.default_fetch_size`
78+
79+
This only takes effect when protocol version 2 or higher is used.
80+
See :attr:`.Cluster.protocol_version` for details.
81+
"""
7582

7683
_serial_consistency_level = None
7784
_routing_key = None

docs/api/cassandra/cluster.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
.. autoattribute:: cql_version
99

10+
.. autoattribute:: protocol_version
11+
1012
.. autoattribute:: port
1113

1214
.. autoattribute:: compression
@@ -59,6 +61,8 @@
5961

6062
.. autoattribute:: row_factory
6163

64+
.. autoattribute:: default_fetch_size
65+
6266
.. automethod:: execute(statement[, parameters][, timeout][, trace])
6367

6468
.. automethod:: execute_async(statement[, parameters][, trace])
@@ -77,11 +81,20 @@
7781

7882
.. automethod:: get_query_trace()
7983

84+
.. autoattribute:: has_more_pages
85+
86+
.. automethod:: start_fetching_next_page()
87+
8088
.. automethod:: add_callback(fn, *args, **kwargs)
8189

8290
.. automethod:: add_errback(fn, *args, **kwargs)
8391

8492
.. automethod:: add_callbacks(callback, errback, callback_args=(), callback_kwargs=None, errback_args=(), errback_args=None)
8593

94+
.. autoclass:: PagedResult ()
95+
:members:
96+
97+
.. autoexception:: QueryExhausted ()
98+
8699
.. autoexception:: NoHostAvailable ()
87100
:members:

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Python Cassandra Driver
88
installation
99
getting_started
1010
performance
11+
query_paging
1112

1213
Indices and Tables
1314
==================

docs/query_paging.rst

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
Paging Large Queries
2+
====================
3+
Cassandra 2.0+ offers support for automatic query paging. Starting with
4+
version 2.0 of the driver, if :attr:`~.Cluster.protocol_version` is set to
5+
:const:`2` (it is by default), queries returning large result sets will be
6+
automatically paged.
7+
8+
Controlling the Page Size
9+
-------------------------
10+
By default, :attr:`.Session.default_fetch_size` controls how many rows will
11+
be fetched per page. This can be overridden per-query by setting
12+
:attr:`~.fetch_size` on a :class:`~.Statement`. By default, each page
13+
will contain at most 5000 rows.
14+
15+
Handling Paged Results
16+
----------------------
17+
Whenever the number of result rows for are query exceed the page size, an
18+
instance of :class:`~.PagedResult` will be returned instead of a normal
19+
list. This class implements the iterator interface, so you can treat
20+
it like a normal iterator over rows::
21+
22+
from cassandra.query import SimpleStatement
23+
query = "SELECT * FROM users" # users contains 100 rows
24+
statement = SimpleStatement(query, fetch_size=10)
25+
for user_row in session.execute(statement):
26+
process_user(user_row)
27+
28+
Whenever there are no more rows in the current page, the next page will
29+
be fetched transparently. However, note that it *is* possible for
30+
an :class:`Exception` to be raised while fetching the next page, just
31+
like you might see on a normal call to ``session.execute()``.
32+
33+
If you use :meth:`.Session.execute_async()` along with,
34+
:meth:`.ResponseFuture.result()`, the first page will be fetched before
35+
:meth:`~.ResponseFuture.result()` returns, but latter pages will be
36+
transparently fetched synchronously while iterating the result.
37+
38+
Handling Paged Results with Callbacks
39+
-------------------------------------
40+
If callbacks are attached to a query that returns a paged result,
41+
the callback will be called once per page with a normal list of rows.
42+
43+
Use :attr:`.ResponseFuture.has_more_pages` and
44+
:meth:`.ResponseFuture.start_fetching_next_page()` to continue fetching
45+
pages. For example::
46+
47+
class PagedResultHandler(object):
48+
49+
def __init__(self, future):
50+
self.error = None
51+
self.finished_event = Event()
52+
self.future = future
53+
self.future.add_callbacks(
54+
callback=self.handle_page,
55+
errback=self.handle_err)
56+
57+
def handle_page(self, rows):
58+
for row in rows:
59+
process_row(row)
60+
61+
if self.future.has_more_pages:
62+
self.future.start_fetching_next_page()
63+
else:
64+
self.finished_event.set()
65+
66+
def handle_error(self, exc):
67+
self.error = exc
68+
self.finished_event.set()
69+
70+
future = session.execute_async("SELECT * FROM users")
71+
handler = PagedResultHandler(future)
72+
handler.finished_event.wait()
73+
if handler.error:
74+
raise handler.error

0 commit comments

Comments
 (0)