Skip to content

Commit 6fa5b43

Browse files
scodejpinner-lyft
authored andcommitted
Add migration support for LegacyBooleanAttribute. Fixes pynamodb#404 (pynamodb#405)
1 parent 31e44f4 commit 6fa5b43

File tree

7 files changed

+315
-2
lines changed

7 files changed

+315
-2
lines changed

.travis.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ before_install:
1717
install:
1818
- pip install -r requirements-dev.txt
1919

20+
before_script:
21+
- wget http://dynamodb-local.s3-website-us-west-2.amazonaws.com/dynamodb_local_latest.tar.gz -O /tmp/dynamodb_local_latest.tar.gz
22+
- tar -xzf /tmp/dynamodb_local_latest.tar.gz -C /tmp
23+
- java -Djava.library.path=/tmp/DynamoDBLocal_lib -jar /tmp/DynamoDBLocal.jar -inMemory -port 8000 &
24+
- sleep 2
25+
2026
script:
2127
- py.test --cov-report term-missing --cov=pynamodb pynamodb/tests/
2228

docs/api.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,9 @@ Exceptions
3232
.. autoexception:: pynamodb.exceptions.TableError
3333
.. autoexception:: pynamodb.exceptions.TableDoesNotExist
3434
.. autoexception:: pynamodb.exceptions.DoesNotExist
35+
36+
Migration API
37+
-------------
38+
39+
.. automodule:: pynamodb.migration
40+
:members:

docs/contributing.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,9 @@ by using ``tox``::
2424

2525
Once you've opened a pull request on GitHub, Travis-ci will run the test suite as well.
2626

27+
By default, certain tests that require a running instance of `DynamoDB Local
28+
<http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/DynamoDBLocal.html>`_ will
29+
not be executed by tox. They will always be executed in Travis-ci. If you wish to run them locally,
30+
edit ``tox.ini`` to not pass ``'-m ddblocal'`` to ``py.test``.
31+
2732
Don't forget to add yourself to `AUTHORS.rst <https://github.com/pynamodb/PynamoDB/blob/devel/AUTHORS.rst>`_.

pynamodb/migration.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
"""
2+
Contains helpers to assist in "migrations" from one version of
3+
PynamoDB to the next, in cases where breaking changes have happened.
4+
"""
5+
6+
import logging
7+
8+
from botocore.exceptions import ClientError
9+
from pynamodb.exceptions import UpdateError
10+
from pynamodb.expressions.operand import Path
11+
12+
log = logging.getLogger(__name__)
13+
14+
15+
def _build_lba_filter_condition(attribute_names):
16+
"""
17+
Build a filter condition suitable for passing to scan/rate_limited_scan, which
18+
will filter out any items for which none of the given attributes have native
19+
DynamoDB type of 'N'.
20+
"""
21+
int_filter_condition = None
22+
for attr_name in attribute_names:
23+
if int_filter_condition is None:
24+
int_filter_condition = Path(attr_name).is_type('N')
25+
else:
26+
int_filter_condition |= Path(attr_name).is_type('N')
27+
28+
return int_filter_condition
29+
30+
31+
def migrate_boolean_attributes(model_class,
32+
attribute_names,
33+
read_capacity_to_consume_per_second=10,
34+
allow_rate_limited_scan_without_consumed_capacity=False,
35+
mock_conditional_update_failure=False):
36+
"""
37+
Migrates boolean attributes per GitHub `issue 404 <https://github.com/pynamodb/PynamoDB/issues/404>`_.
38+
39+
Will scan through all objects and perform a conditional update
40+
against any items that store any of the given attribute names as
41+
integers. Rate limiting is performed by passing an appropriate
42+
value as ``read_capacity_to_consume_per_second`` (which defaults to
43+
something extremely conservative and slow).
44+
45+
Note that updates require provisioned write capacity as
46+
well. Please see `the DynamoDB docs
47+
<http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html>`_
48+
for more information. Keep in mind that there is not a simple 1:1
49+
mapping between provisioned read capacity and write capacity. Make
50+
sure they are balanced. A conservative calculation would assume
51+
that every object visted results in an update.
52+
53+
The function with log at level ``INFO`` the final outcome, and the
54+
return values help identify how many items needed changing and how
55+
many of them succeed. For example, if you had 10 items in the
56+
table and every one of them had an attribute that needed
57+
migration, and upon migration we had one item which failed the
58+
migration due to a concurrent update by another writer, the return
59+
value would be: ``(10, 1)``
60+
61+
Suggesting that 9 were updated successfully.
62+
63+
It is suggested that the migration step be re-ran until the return
64+
value is ``(0, 0)``.
65+
66+
:param model_class: The Model class for which you are migrating. This should
67+
be the up-to-date Model class using a BooleanAttribute for
68+
the relevant attributes.
69+
:param attribute_names: List of strings that signifiy the names of attributes which
70+
are potentially in need of migration.
71+
:param read_capacity_to_consume_per_second: Passed along to the underlying
72+
`rate_limited_scan` and intended as
73+
the mechanism to rate limit progress. Please
74+
see notes below around write capacity.
75+
:param allow_rate_limited_scan_without_consumed_capacity: Passed along to `rate_limited_scan`; intended
76+
to allow unit tests to pass against DynamoDB Local.
77+
:param mock_conditional_update_failure: Only used for unit testing. When True, the conditional update expression
78+
used internally is updated such that it is guaranteed to fail. This is
79+
meant to trigger the code path in boto, to allow us to unit test that
80+
we are jumping through appropriate hoops handling the resulting
81+
failure and distinguishing it from other failures.
82+
83+
:return: (number_of_items_in_need_of_update, number_of_them_that_failed_due_to_conditional_update)
84+
"""
85+
log.info('migrating items; no progress will be reported until completed; this may take a while')
86+
num_items_with_actions = 0
87+
num_update_failures = 0
88+
89+
for item in model_class.rate_limited_scan(_build_lba_filter_condition(attribute_names),
90+
read_capacity_to_consume_per_second=read_capacity_to_consume_per_second,
91+
allow_rate_limited_scan_without_consumed_capacity=allow_rate_limited_scan_without_consumed_capacity):
92+
actions = []
93+
condition = None
94+
for attr_name in attribute_names:
95+
if not hasattr(item, attr_name):
96+
raise ValueError('attribute {0} does not exist on model'.format(attr_name))
97+
old_value = getattr(item, attr_name)
98+
if old_value is None:
99+
continue
100+
if not isinstance(old_value, bool):
101+
raise ValueError('attribute {0} does not appear to be a boolean attribute'.format(attr_name))
102+
103+
actions.append(getattr(model_class, attr_name).set(getattr(item, attr_name)))
104+
105+
if condition is None:
106+
condition = Path(attr_name) == (1 if old_value else 0)
107+
else:
108+
condition = condition & Path(attr_name) == (1 if old_value else 0)
109+
110+
if actions:
111+
if mock_conditional_update_failure:
112+
condition = condition & (Path('__bogus_mock_attribute') == 5)
113+
try:
114+
num_items_with_actions += 1
115+
item.update(actions=actions, condition=condition)
116+
except UpdateError as e:
117+
if isinstance(e.cause, ClientError):
118+
code = e.cause.response['Error'].get('Code')
119+
if code == 'ConditionalCheckFailedException':
120+
log.warn('conditional update failed (concurrent writes?) for object: %s (you will need to re-run migration)', item)
121+
num_update_failures += 1
122+
else:
123+
raise
124+
else:
125+
raise
126+
log.info('finished migrating; %s items required updates, %s failed due to racing writes and require re-running migration',
127+
num_items_with_actions, num_update_failures)
128+
return num_items_with_actions, num_update_failures
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
import os
2+
import pytest
3+
4+
from pynamodb.attributes import BooleanAttribute, LegacyBooleanAttribute, UnicodeAttribute
5+
from pynamodb.expressions.operand import Path
6+
from pynamodb.migration import migrate_boolean_attributes
7+
from pynamodb.models import Model
8+
9+
10+
@pytest.fixture()
11+
def ddb_url():
12+
"""Obtain the URL of a local DynamoDB instance.
13+
14+
This is meant to be used with something like DynamoDB Local:
15+
16+
http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/DynamoDBLocal.html
17+
18+
It must be set up "out of band"; we merely assume it exists on
19+
http://localhost:8000 or a URL specified though the
20+
PYNAMODB_INTEGRATION_TEST_DDB_URL environment variable.
21+
"""
22+
ddb_url = os.getenv("PYNAMODB_INTEGRATION_TEST_DDB_URL")
23+
return "http://localhost:8000" if ddb_url is None else ddb_url
24+
25+
26+
@pytest.mark.ddblocal
27+
def test_migrate_boolean_attributes_upgrade_path(ddb_url):
28+
class BAModel(Model):
29+
class Meta:
30+
table_name = 'migration_test_lba_to_ba'
31+
host = ddb_url
32+
id = UnicodeAttribute(hash_key=True)
33+
flag = BooleanAttribute(null=True)
34+
35+
class LBAModel(Model):
36+
class Meta:
37+
table_name = 'migration_test_lba_to_ba'
38+
host = ddb_url
39+
id = UnicodeAttribute(hash_key=True)
40+
flag = LegacyBooleanAttribute(null=True)
41+
42+
LBAModel.create_table(read_capacity_units=1, write_capacity_units=1)
43+
44+
# Create one "offending" object written as an integer using LBA.
45+
LBAModel('pkey', flag=True).save()
46+
assert 1 == len([_ for _ in LBAModel.query('pkey', LBAModel.flag == True)])
47+
48+
# We should NOT be able to read it using BA.
49+
assert 0 == len([_ for _ in BAModel.query('pkey', BAModel.flag == True)])
50+
51+
# ... unless we jump through hoops using Path
52+
assert 1 == len([_ for _ in BAModel.query('pkey', Path('flag') == 1)])
53+
54+
# Migrate the object to being stored as Boolean.
55+
assert (1, 0) == migrate_boolean_attributes(BAModel, ['flag'], allow_rate_limited_scan_without_consumed_capacity=True)
56+
57+
# We should now be able to read it using BA.
58+
assert 1 == len([_ for _ in BAModel.query('pkey', BAModel.flag == True)])
59+
60+
# ... or through the hoop jumping.
61+
assert 1 == len([_ for _ in BAModel.query('pkey', Path('flag') == True)])
62+
63+
64+
@pytest.mark.ddblocal
65+
def test_migrate_boolean_attributes_none_okay(ddb_url):
66+
"""Ensure migration works for attributes whose value is None."""
67+
class LBAModel(Model):
68+
class Meta:
69+
table_name = 'migration_test_lba_to_ba'
70+
host = ddb_url
71+
id = UnicodeAttribute(hash_key=True)
72+
flag = LegacyBooleanAttribute(null=True)
73+
74+
LBAModel.create_table(read_capacity_units=1, write_capacity_units=1)
75+
LBAModel('pkey', flag=None).save()
76+
assert (0, 0) == migrate_boolean_attributes(LBAModel, ['flag'], allow_rate_limited_scan_without_consumed_capacity=True)
77+
78+
79+
@pytest.mark.ddblocal
80+
def test_migrate_boolean_attributes_conditional_update_failure(ddb_url):
81+
"""Ensure migration works for attributes whose value is None."""
82+
class LBAModel(Model):
83+
class Meta:
84+
table_name = 'migration_test_lba_to_ba'
85+
host = ddb_url
86+
id = UnicodeAttribute(hash_key=True)
87+
flag = LegacyBooleanAttribute(null=True)
88+
89+
LBAModel.create_table(read_capacity_units=1, write_capacity_units=1)
90+
LBAModel('pkey', flag=1).save()
91+
assert (1, 1) == migrate_boolean_attributes(LBAModel, ['flag'],
92+
allow_rate_limited_scan_without_consumed_capacity=True,
93+
mock_conditional_update_failure=True)
94+
95+
96+
@pytest.mark.ddblocal
97+
def test_migrate_boolean_attributes_missing_attribute(ddb_url):
98+
class LBAModel(Model):
99+
class Meta:
100+
table_name = 'migration_test_lba_to_ba'
101+
host = ddb_url
102+
id = UnicodeAttribute(hash_key=True)
103+
flag = LegacyBooleanAttribute(null=True)
104+
105+
LBAModel.create_table(read_capacity_units=1, write_capacity_units=1)
106+
LBAModel('pkey', flag=True).save()
107+
with pytest.raises(ValueError) as e:
108+
migrate_boolean_attributes(LBAModel, ['flag', 'bogus'], allow_rate_limited_scan_without_consumed_capacity=True)
109+
assert str(e.value).find('does not exist on model') != -1
110+
111+
112+
@pytest.mark.ddblocal
113+
def test_migrate_boolean_attributes_wrong_attribute_type(ddb_url):
114+
class LBAModel(Model):
115+
class Meta:
116+
table_name = 'migration_test_lba_to_ba'
117+
host = ddb_url
118+
id = UnicodeAttribute(hash_key=True)
119+
flag = LegacyBooleanAttribute(null=True)
120+
other = UnicodeAttribute(null=True)
121+
122+
LBAModel.create_table(read_capacity_units=1, write_capacity_units=1)
123+
LBAModel('pkey', flag=True, other='test').save()
124+
with pytest.raises(ValueError) as e:
125+
migrate_boolean_attributes(LBAModel, ['flag', 'other'], allow_rate_limited_scan_without_consumed_capacity=True)
126+
assert str(e.value).find('does not appear to be a boolean attribute') != -1
127+
128+
129+
@pytest.mark.ddblocal
130+
def test_migrate_boolean_attributes_multiple_attributes(ddb_url):
131+
class LBAModel(Model):
132+
class Meta:
133+
table_name = 'migration_test_lba_to_ba'
134+
host = ddb_url
135+
id = UnicodeAttribute(hash_key=True)
136+
flag = LegacyBooleanAttribute(null=True)
137+
flag2 = LegacyBooleanAttribute(null=True)
138+
139+
LBAModel.create_table(read_capacity_units=1, write_capacity_units=1)
140+
# specifically use None and True here rather than two Trues
141+
LBAModel('pkey', flag=None, flag2=True).save()
142+
assert (1, 0) == migrate_boolean_attributes(LBAModel, ['flag', 'flag2'], allow_rate_limited_scan_without_consumed_capacity=True)
143+
144+
145+
@pytest.mark.ddblocal
146+
def test_migrate_boolean_attributes_skip_native_booleans(ddb_url):
147+
class BAModel(Model):
148+
class Meta:
149+
table_name = 'migration_test_lba_to_ba'
150+
host = ddb_url
151+
id = UnicodeAttribute(hash_key=True)
152+
flag = BooleanAttribute(null=True)
153+
154+
BAModel.create_table(read_capacity_units=1, write_capacity_units=1)
155+
BAModel('pkey', flag=True).save()
156+
assert (0, 0) == migrate_boolean_attributes(BAModel, ['flag'], allow_rate_limited_scan_without_consumed_capacity=True)

requirements-dev.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
1-
-rrequirements.txt
1+
# It turns out we "actually" require >=1.6.0 for folks on Python 3.6 because of:
2+
#
3+
# https://github.com/boto/botocore/issues/1079
4+
#
5+
# This was discovered as a result of integration tests that exercised boto. However, until 4.x
6+
# we didn't want to bump the actual dependency of the library for consumers as it would effectively
7+
# be a breaking change. As a result, we use the 1.6.0 dependency for development here for the
8+
# purpose of integration tests, even though requirements.txt still has 1.2.0.
9+
botocore==1.6.0
10+
six==1.9.0
211
coverage==3.7.1
312
mock==2.0.0
413
pytest==3.1.1

tox.ini

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,7 @@ envlist = py26,py27,py33,py34,py35,pypy
66

77
[testenv]
88
deps = -rrequirements-dev.txt
9-
commands = py.test pynamodb/tests
9+
; To include integration tests that require DynamoDB Local,
10+
; use this instead:
11+
;commands = py.test pynamodb/tests
12+
commands = py.test -m 'not ddblocal' pynamodb/tests

0 commit comments

Comments
 (0)