-
Notifications
You must be signed in to change notification settings - Fork 196
/
Copy pathmodel.py
1150 lines (978 loc) · 48.6 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright 2010 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The Person Finder data model, based on PFIF (http://zesty.ca/pfif)."""
from datetime import timedelta
from google.appengine.api import datastore_errors
from google.appengine.api import memcache
from google.appengine.ext import db
from six.moves.urllib import parse as urlparse
import urllib
import config
import full_text_search
import indexing
import pfif
import prefix
from const import HOME_DOMAIN, NOTE_STATUS_TEXT
# default # of days for a record to expire.
DEFAULT_EXPIRATION_DAYS = 40
# ==== PFIF record IDs =====================================================
def is_original(repo, record_id):
"""Returns True if this is a record_id for a record originally created in
the specified repository."""
try:
repo_id, local_id = record_id.split('/', 1)
return repo_id == repo + '.' + HOME_DOMAIN
except ValueError:
raise ValueError('%r is not a valid record_id' % record_id)
def is_clone(repo, record_id):
"""Returns True if this is a record_id for a clone record (a record created
in another repository and copied into the specified one)."""
return not is_original(repo, record_id)
def filter_by_prefix(query, key_name_prefix):
"""Filters a query for key_names that have the given prefix. If root_kind
is specified, filters the query for children of any entities that are of
that kind with the given prefix; otherwise, the results are assumed to be
top-level entities of the kind being queried."""
root_kind = query._model_class.__name__
min_key = db.Key.from_path(root_kind, key_name_prefix)
max_key = db.Key.from_path(root_kind, key_name_prefix + u'\uffff')
return query.filter('__key__ >=', min_key).filter('__key__ <=', max_key)
def get_properties_as_dict(db_obj):
"""Returns a dictionary containing all (dynamic)* properties of db_obj."""
properties = dict((k, v.__get__(db_obj, db_obj.__class__)) for
k, v in db_obj.properties().iteritems() if
v.__get__(db_obj, db_obj.__class__))
dynamic_properties = dict((prop, getattr(db_obj, prop)) for
prop in db_obj.dynamic_properties())
properties.update(dynamic_properties)
return properties
def clone_to_new_type(origin, dest_class, **kwargs):
"""Clones the given entity to a new entity of the type "dest_class".
Optionally, pass in values to kwargs to update values during cloning."""
vals = get_properties_as_dict(origin)
vals.update(**kwargs)
if hasattr(origin, 'record_id'):
vals.update(record_id=origin.record_id)
return dest_class(key_name=origin.key().name(), **vals)
# ==== Model classes =======================================================
# Every Person or Note entity belongs to a specific repository. To partition
# the datastore, key names consist of the repo name, a colon, and then the
# record ID. Each repository appears to be a separate instance of the app.
# Note that the repository name doesn't necessarily have to match the original
# repository domain in the record ID! For example, a person record created at
# foo.person-finder.appspot.com would have a key name such as:
#
# foo:foo.person-finder.appspot.com/person.234
#
# This record would be searchable only at foo.person-finder.appspot.com --
# each repository is independent. Copying it to bar.person-finder.appspot.com
# would produce a clone record with the key name:
#
# bar:foo.person-finder.appspot.com/person.234
#
# That is, the clone has the same record ID but a different repository name.
class Repo(db.Model):
"""Identifier for a repository of Person and Note records. This is a
top-level entity, with no parent, whose existence just indicates the
existence of a repository. Key name: unique repository name. In the UI,
each repository behaves like an independent instance of the application."""
class ActivationStatus(object):
"""An enum for the launch/activation status of the repo."""
# For use with repositories that have not yet been publicly launched.
# Staging repos aren't listed on the homepage or in the repository feed,
# but are otherwise totally usable repos for anyone who knows the URL.
STAGING = 0
# For repositories in active use. These repos are listed on the homepage
# and in the repository feed.
ACTIVE = 1
# For repositories that have been turned down. The repository is
# unavailable, through either the web interface or the API, and users
# will instead see a deactivation message. These repos aren't listed on
# the homepage or in the repository feed.
DEACTIVATED = 2
# TODO(nworden): actually use this field
activation_status = db.IntegerProperty(
required=False, default=ActivationStatus.STAGING)
# Whether the repository is in test mode; meant for use with evergreen
# repositories when they're not needed. Records for repos in test mode are
# automatically deleted after 24 hours.
test_mode = db.BooleanProperty(default=False)
# Few properties for now; the repository title and other settings are all in
# ConfigEntry entities (see config.py).
@staticmethod
def get(repo_id):
return Repo.get_by_key_name(repo_id)
@classmethod
def list(cls):
"""Returns a list of all repository names."""
return [repo.key().name() for repo in cls.all()]
@classmethod
def list_active(cls):
"""Returns a list of the active (non-deactivated) repository names."""
staging = [repo.key().name() for repo in Repo.all().filter(
'activation_status =', Repo.ActivationStatus.STAGING)]
active = [repo.key().name() for repo in Repo.all().filter(
'activation_status =', Repo.ActivationStatus.ACTIVE)]
return staging + active
@classmethod
def list_launched(cls):
"""Returns a list of the launched (listed in menu) repository names."""
return [repo.key().name() for repo in Repo.all().filter(
'activation_status =', Repo.ActivationStatus.ACTIVE)]
def is_deactivated(self):
return self.activation_status == Repo.ActivationStatus.DEACTIVATED
class Base(db.Model):
"""Base class providing methods common to both Person and Note entities,
whose key names are partitioned using the repo name as a prefix."""
# max records to fetch in one go.
FETCH_LIMIT = 200
# Even though the repo is part of the key_name, it is also stored
# redundantly as a separate property so it can be indexed and queried upon.
repo = db.StringProperty(required=True)
# We can't use an inequality filter on expiry_date (together with other
# inequality filters), so we use a periodic task to set the is_expired flag
# on expired records, and filter using the flag. A record's life cycle is:
#
# 1. Record is created with some expiry_date.
# 2. expiry_date passes.
# 3. tasks.DeleteExpired sets is_expired to True; record vanishes from UI.
# 4. delete.EXPIRED_TTL_DAYS days pass.
# 5. tasks.DeleteExpired wipes the record.
# We set default=False to ensure all entities are indexed by is_expired.
# NOTE: is_expired should ONLY be modified in Person.put_expiry_flags().
is_expired = db.BooleanProperty(required=False, default=False)
@classmethod
def all(cls, keys_only=False, filter_expired=True):
"""Returns a query for all records of this kind; by default this
filters out the records marked as expired.
Args:
keys_only - If true, return only the keys.
filter_expired - If true, omit records with is_expired == True.
Returns:
query - A Query object for the results.
"""
query = super(Base, cls).all(keys_only=keys_only)
if filter_expired:
query.filter('is_expired =', False)
return query
@classmethod
def all_in_repo(cls, repo, filter_expired=True):
"""Gets a query for all entities in a given repository."""
return cls.all(filter_expired=filter_expired).filter('repo =', repo)
def get_record_id(self):
"""Returns the record ID of this record."""
repo, record_id = self.key().name().split(':', 1)
return record_id
record_id = property(get_record_id)
def get_original_domain(self):
"""Returns the domain name of this record's original repository."""
return self.record_id.split('/', 1)[0]
original_domain = property(get_original_domain)
def is_original(self):
"""Returns True if this record was created in this repository."""
return is_original(self.repo, self.record_id)
def is_clone(self):
"""Returns True if this record was copied from another repository."""
return not self.is_original()
@classmethod
def get_key(cls, repo, record_id):
"""Get entity key from its record id"""
return db.Key.from_path(cls.kind(), repo + ':' + record_id)
@classmethod
def get_all(cls, repo, record_ids, limit=200):
"""Gets the entities with the given record_ids in a given repository."""
keys = [cls.get_key(repo, id) for id in record_ids]
return [record for record in db.get(keys) if record is not None]
@classmethod
def get(cls, repo, record_id, filter_expired=True):
"""Gets the entity with the given record_id in a given repository."""
record = cls.get_by_key_name(repo + ':' + record_id)
if record:
if not (filter_expired and record.is_expired):
return record
@classmethod
def create_original(cls, repo, **kwargs):
"""Creates a new original entity with the given field values."""
# TODO(ryok): Consider switching to URL-like record id format,
# which is more consitent with repo id format.
record_id = '%s.%s/%s.%d' % (
repo, HOME_DOMAIN, cls.__name__.lower(), UniqueId.create_id())
return cls(key_name=repo + ':' + record_id, repo=repo, **kwargs)
@classmethod
def create_clone(cls, repo, record_id, **kwargs):
"""Creates a new clone entity with the given field values."""
assert is_clone(repo, record_id)
return cls(key_name=repo + ':' + record_id, repo=repo, **kwargs)
# TODO(kpy): Rename this function (maybe to create_with_record_id?).
@classmethod
def create_original_with_record_id(cls, repo, record_id, **kwargs):
"""Creates an original entity with the given record_id and field
values, overwriting any existing entity with the same record_id.
This should be rarely used in practice (e.g. for an administrative
import into a home repository), hence the long method name."""
return cls(key_name=repo + ':' + record_id, repo=repo, **kwargs)
# All fields are either required, or have a default value. For property
# types that have a false value, the default is the false value. For types
# with no false value, the default is None.
class Person(Base):
"""The datastore entity kind for storing a PFIF person record. Never call
Person() directly; use Person.create_clone() or Person.create_original().
Methods that start with "get_" return actual values or lists of values;
other methods return queries or generators for values.
"""
# If you add any new fields, be sure they are handled in wipe_contents().
# entry_date should update every time a record is created or re-imported.
entry_date = db.DateTimeProperty(required=True)
expiry_date = db.DateTimeProperty(required=False)
author_name = db.StringProperty(default='', multiline=True)
author_email = db.StringProperty(default='')
author_phone = db.StringProperty(default='')
# the original date we saw this record; it should not change.
original_creation_date = db.DateTimeProperty(auto_now_add=True)
# source_date is the date that the original repository last changed
# any of the fields in the pfif record.
source_date = db.DateTimeProperty()
source_name = db.StringProperty(default='')
source_url = db.StringProperty(default='')
# TODO(ryok): consider marking this required.
full_name = db.StringProperty(multiline=True)
given_name = db.StringProperty()
family_name = db.StringProperty()
alternate_names = db.StringProperty(default='', multiline=True)
description = db.TextProperty(default='')
sex = db.StringProperty(default='', choices=pfif.PERSON_SEX_VALUES)
date_of_birth = db.StringProperty(default='') # YYYY, YYYY-MM, YYYY-MM-DD
age = db.StringProperty(default='') # NN or NN-MM
home_street = db.StringProperty(default='')
home_neighborhood = db.StringProperty(default='')
home_city = db.StringProperty(default='')
home_state = db.StringProperty(default='')
home_postal_code = db.StringProperty(default='')
home_country = db.StringProperty(default='')
photo_url = db.TextProperty(default='')
profile_urls = db.TextProperty(default='')
# This reference points to a locally stored Photo entity. ONLY set this
# property when storing a new Photo object that is owned by this Person
# record and can be safely deleted when the Person is deleted.
photo = db.ReferenceProperty(default=None)
# The following properties are not part of the PFIF data model; they are
# cached on the Person for efficiency.
# Value of the 'status' and 'source_date' properties on the Note
# with the latest source_date with the 'status' field present.
latest_status = db.StringProperty(default='')
latest_status_source_date = db.DateTimeProperty()
# Value of the 'author_made_contact' and 'source_date' properties on the
# Note with the latest source_date with the 'author_made_contact' field
# present.
latest_found = db.BooleanProperty()
latest_found_source_date = db.DateTimeProperty()
# Last write time of this Person or any Notes on this Person.
# This reflects any change to the Person page.
last_modified = db.DateTimeProperty(auto_now=True)
# This flag is set to true only when the record author disabled
# adding new notes to a record.
notes_disabled = db.BooleanProperty(default=False)
# attributes used by indexing.py
names_prefixes = db.StringListProperty()
# TODO(ryok): index address components.
_fields_to_index_properties = ['given_name', 'family_name', 'full_name']
_fields_to_index_by_prefix_properties = ['given_name', 'family_name',
'full_name']
@staticmethod
def past_due_records(repo):
"""Returns a query for all Person records with expiry_date in the past,
or None, regardless of their is_expired flags."""
import utils
return Person.all(filter_expired=False).filter(
'expiry_date <=', utils.get_utcnow()).filter(
'repo =', repo)
@staticmethod
def potentially_expired_records(repo,
days_to_expire=DEFAULT_EXPIRATION_DAYS):
"""Returns a query for all Person records with source date
older than days_to_expire (or empty source_date), regardless of
is_expired flags value."""
import utils
cutoff_date = utils.get_utcnow() - timedelta(days_to_expire)
return Person.all(filter_expired=False).filter(
'source_date <=',cutoff_date).filter(
'repo =', repo)
@property
def person_record_id(self):
return self.record_id
@property
def primary_full_name(self):
return self.full_name.splitlines()[0] if self.full_name else ''
@property
def full_name_list(self):
return self.full_name.splitlines() if self.full_name else []
@property
def alternate_names_list(self):
return self.alternate_names.splitlines() if self.alternate_names else []
@property
def fuzzified_age(self):
import utils
return utils.fuzzify_age(self.age) if self.age else None
@property
def profile_urls_list(self):
return self.profile_urls.splitlines() if self.profile_urls else []
@property
def photo_url_no_scheme(self):
import utils
return utils.strip_url_scheme(self.photo_url)
def photo_is_local(self, request_url):
# TODO(nworden): consider setting the acceptable domain in
# site_settings.py, so that we don't have to pass a request URL in. It's
# not obvious how to account for different App Engine versions without
# it being a hassle, but shouldn't be too hard.
if not self.photo_url:
return False
else:
_, our_netloc, _, _, _ = urlparse.urlsplit(request_url)
_, photo_netloc, _, _, _ = urlparse.urlsplit(self.photo_url)
return photo_netloc == our_netloc
URL_PARSE_QUERY_INDEX = 4
# TODO(nworden): if/when we make photo_is_local a property (that doesn't
# need the request_url) argument, make this just a property too. I only have
# it as a static method because I wouldn't want someone to accidentally try
# to use it when the URL isn't local.
@staticmethod
def get_thumbnail_url(local_photo_url):
parsed_url = list(urlparse.urlparse(local_photo_url))
params_dict = dict(urlparse.parse_qsl(
parsed_url[Person.URL_PARSE_QUERY_INDEX]))
params_dict['thumb'] = 'true'
parsed_url[Person.URL_PARSE_QUERY_INDEX] = urllib.urlencode(
params_dict)
return urlparse.urlunparse(parsed_url)
def get_notes(self, filter_expired=True):
"""Returns a list of all the Notes on this Person, omitting expired
Notes by default."""
return Note.get_by_person_record_id(
self.repo, self.record_id, filter_expired=filter_expired)
@property
def unexpired_notes(self):
try:
return self.get_notes()
except datastore_errors.NeedIndexError:
return []
def get_subscriptions(self, subscription_limit=200):
"""Retrieves a list of all the Subscriptions for this Person."""
return Subscription.get_by_person_record_id(
self.repo, self.record_id, limit=subscription_limit)
def get_linked_person_ids(self, note_limit=200):
"""Retrieves IDs of Persons marked as duplicates of this Person."""
return [note.linked_person_record_id
for note in self.get_notes(note_limit)
if note.linked_person_record_id]
def get_linked_persons(self, note_limit=200):
"""Retrieves Persons marked as duplicates of this Person."""
return Person.get_all(self.repo,
self.get_linked_person_ids(note_limit))
def get_all_linked_persons(self):
"""Retrieves all Persons transitively linked to this Person."""
linked_person_ids = set([self.record_id])
linked_persons = []
# Maintain a list of ids of duplicate persons that have not
# yet been processed.
new_person_ids = set(self.get_linked_person_ids())
# Iteratively process all new_person_ids by retrieving linked
# duplicates and storing those not yet processed.
# Processed ids are stored in the linked_person_ids set, and
# their corresponding records are in the linked_persons list.
while new_person_ids:
linked_person_ids.update(new_person_ids)
new_persons = Person.get_all(self.repo, list(new_person_ids))
for person in new_persons:
new_person_ids.update(person.get_linked_person_ids())
linked_persons += new_persons
new_person_ids -= linked_person_ids
return linked_persons
def get_associated_emails(self):
"""Gets a set of all the e-mail addresses to notify when this record
is changed."""
email_addresses = set([note.author_email for note in self.get_notes()
if note.author_email])
if self.author_email:
email_addresses.add(self.author_email)
return email_addresses
def get_effective_expiry_date(self):
"""Gets the expiry_date, or if no expiry_date is present, returns the
source_date plus the configurable default_expiration_days interval.
If there's no source_date, we use original_creation_date.
Returns:
A datetime date (not None).
"""
if self.expiry_date:
return self.expiry_date
else:
expiration_days = config.get_for_repo(
self.repo, 'default_expiration_days') or (
DEFAULT_EXPIRATION_DAYS)
# in theory, we should always have original_creation_date, but since
# it was only added recently, we might have legacy
# records without it.
start_date = self.original_creation_date or utils.get_utcnow()
return start_date + timedelta(expiration_days)
def put_expiry_flags(self):
"""Updates the is_expired flags on this Person and related Notes to
make them consistent with the effective_expiry_date() on this Person,
and commits the changes to the datastore."""
import utils
now = utils.get_utcnow()
expired = self.get_effective_expiry_date() <= now
if self.is_expired != expired:
# NOTE: This should be the ONLY code that modifies is_expired.
self.is_expired = expired
# if we neglected to capture the original_creation_date,
# make a best effort to grab it now, for posterity.
if not self.original_creation_date:
self.original_creation_date = self.source_date
# If the record is expiring (being replaced with a placeholder,
# see http://zesty.ca/pfif/1.3/#data-expiry) or un-expiring (being
# restored from deletion), we want the source_date and entry_date
# updated so downstream clients will see this as the newest state.
self.source_date = now
self.entry_date = now
# All the Notes on the Person also expire or unexpire, to match.
notes = self.get_notes(filter_expired=False)
for note in notes:
note.is_expired = expired
# Store these changes in the datastore.
db.put(notes + [self])
# TODO(lschumacher): photos don't have expiration currently.
def wipe_contents(self):
"""Sets all the content fields to None (leaving timestamps and the
expiry flag untouched), stores the empty record, and permanently
deletes any related Notes and Photos. Call this method ONLY on records
that have already expired."""
# We rely on put_expiry_flags to have properly set the source_date,
# entry_date, and is_expired flags on Notes, as necessary.
assert self.is_expired
# Permanently delete all related Photos and Notes, but not self.
self.delete_related_entities()
was_changed = False
# TODO(nworden): consider adding a is_tombstone property or something
# like that, so we could just check that instead of checking each
# property individually every time.
for name, property in self.properties().items():
# Leave the repo, is_expired flag, and timestamps untouched.
if name not in ['repo', 'is_expired', 'original_creation_date',
'source_date', 'entry_date', 'expiry_date',
'last_modified']:
if name == 'photo':
# If we attempt to access this directly, Datastore will try
# to fetch the actual photo, which won't go well, because we
# just deleted the photo.
cur_value = Person.photo.get_value_for_datastore(self)
else:
cur_value = getattr(self, name)
if cur_value != property.default:
setattr(self, name, property.default)
was_changed = True
if was_changed:
self.put() # Store the empty placeholder record.
def delete_related_entities(self, delete_self=False):
"""Permanently delete all related Photos and Notes, and also self if
delete_self is True."""
# Delete all related Notes.
notes = self.get_notes(filter_expired=False)
# Delete the locally stored Photos. We use get_value_for_datastore to
# get just the keys and prevent auto-fetching the Photo data.
photo = Person.photo.get_value_for_datastore(self)
note_photos = [Note.photo.get_value_for_datastore(n) for n in notes]
entities_to_delete = filter(None, notes + [photo] + note_photos)
if delete_self:
entities_to_delete.append(self)
if config.get('enable_fulltext_search'):
full_text_search.delete_record_from_index(self)
db.delete(entities_to_delete)
def update_from_note(self, note):
"""Updates any necessary fields on the Person to reflect a new Note."""
# We want to transfer only the *non-empty, newer* values to the Person.
if note.author_made_contact is not None: # for boolean, None means
# unspecified
# datetime stupidly refuses to compare to None, so check for None.
if (self.latest_found_source_date is None or
note.source_date >= self.latest_found_source_date):
self.latest_found = note.author_made_contact
self.latest_found_source_date = note.source_date
if note.status: # for string, '' means unspecified
if (self.latest_status_source_date is None or
note.source_date >= self.latest_status_source_date):
self.latest_status = note.status
self.latest_status_source_date = note.source_date
def update_index(self, which_indexing):
#setup new indexing
if 'new' in which_indexing:
indexing.update_index_properties(self)
if config.get('enable_fulltext_search'):
full_text_search.add_record_to_index(self)
# setup old indexing
if 'old' in which_indexing:
prefix.update_prefix_properties(self)
def update_latest_status(self, modified_note=None):
"""Scans all notes on this Person and fixes latest_status if needed."""
status = None
status_source_date = None
for note in self.get_notes():
if modified_note and modified_note.note_record_id == note.record_id:
note = modified_note
if note.status and not note.hidden:
status = note.status
status_source_date = note.source_date
if status != self.latest_status:
self.latest_status = status
self.latest_status_source_date = status_source_date
self.put()
def put_new(self):
"""Write the new person record to datastore. Increments person_counter
because a new record is created. Logs user actions is updated too.
We should never call this method against an existing record."""
db.put(self)
UsageCounter.increment_counter(self.repo, ['person'])
UserActionLog.put_new('add', self, copy_properties=False)
# Old indexing
# TODO(ryok): This is obsolete. Remove it.
prefix.add_prefix_properties(
Person, 'given_name', 'family_name', 'home_street', 'home_neighborhood',
'home_city', 'home_state', 'home_postal_code')
class Note(Base):
"""The datastore entity kind for storing a PFIF note record. Never call
Note() directly; use Note.create_clone() or Note.create_original()."""
# The entry_date should update every time a record is re-imported.
entry_date = db.DateTimeProperty(required=True)
person_record_id = db.StringProperty(required=True)
# Use this field to store the person_record_id of a duplicate Person entry.
linked_person_record_id = db.StringProperty(default='')
author_name = db.StringProperty(default='', multiline=True)
author_email = db.StringProperty(default='')
author_phone = db.StringProperty(default='')
# the original date we saw this record; it should not change.
original_creation_date = db.DateTimeProperty(auto_now_add=True)
# source_date is the date that the original repository last changed
# any of the fields in the pfif record.
source_date = db.DateTimeProperty()
status = db.StringProperty(default='', choices=pfif.NOTE_STATUS_VALUES)
author_made_contact = db.BooleanProperty()
email_of_found_person = db.StringProperty(default='')
phone_of_found_person = db.StringProperty(default='')
last_known_location = db.StringProperty(default='', multiline=True)
text = db.TextProperty(default='')
photo_url = db.TextProperty(default='')
# This reference points to a locally stored Photo entity. ONLY set this
# property when storing a new Photo object that is owned by this Note
# record and can be safely deleted when the Note is deleted.
photo = db.ReferenceProperty(default=None)
# True if the note has been marked as spam. Will cause the note to be
# initially hidden from display upon loading a record page.
hidden = db.BooleanProperty(default=False)
# True if the note has been reviewed for spam content at /admin/review.
reviewed = db.BooleanProperty(default=False)
def get_note_record_id(self):
return self.record_id
note_record_id = property(get_note_record_id)
@property
def photo_url_no_scheme(self):
import utils
return utils.strip_url_scheme(self.photo_url)
@staticmethod
def get_by_person_record_id(
repo, person_record_id, filter_expired=True):
"""Gets a list of all the Notes on a Person, ordered by source_date."""
return list(Note.generate_by_person_record_id(
repo, person_record_id, filter_expired))
@staticmethod
def generate_by_person_record_id(
repo, person_record_id, filter_expired=True):
"""Generates all the Notes on a Person record ordered by source_date."""
query = Note.all_in_repo(repo, filter_expired=filter_expired
).filter('person_record_id =', person_record_id
).order('source_date')
notes = query.fetch(Note.FETCH_LIMIT)
while notes:
for note in notes:
yield note
query.with_cursor(query.cursor()) # Continue where fetch left off.
notes = query.fetch(Note.FETCH_LIMIT)
@staticmethod
def get_unreviewed_notes_count(repo, filter_expired=True):
"""Gets the number of unreviewed notes."""
query = Note.all_in_repo(repo, filter_expired=filter_expired
).filter('reviewed =', False).filter('hidden =', False)
return query.count()
def put_new(self):
"""Write the new note to datastore. Increments note_counter because
a new note is created. Also, logs user actions is updated. We should
never call this method against an existing record."""
db.put(self)
UserActionLog.put_new('add', self, copy_properties=False)
note_status = self.status if self.status else 'unspecified'
UsageCounter.increment_counter(self.repo, ['note', note_status])
class NoteWithBadWords(Note):
# Spam score given by SpamDetector
spam_score = db.FloatProperty(default=0)
# True is the note is confirmed by its author through email
confirmed = db.BooleanProperty(default=False)
# Once the note is confirmed, this field stores the copy of
# this note in Note table. It will be useful if we want to
# delete the notes with bad words, even when they are confirmed.
confirmed_copy_id = db.StringProperty(default='')
class Photo(db.Model):
"""An uploaded image file. Key name: repo + ':' + photo_id."""
# Even though the repo is part of the key_name, it is also stored
# redundantly as a separate property so it can be indexed and queried upon.
repo = db.StringProperty(required=True)
image_data = db.BlobProperty() # sanitized, resized image in PNG format
upload_date = db.DateTimeProperty(auto_now_add=True)
# thumbnail image in PNG format
thumbnail_data = db.BlobProperty(default=None)
@staticmethod
def create(repo, **kwargs):
"""Creates a Photo entity with the given field values."""
id = UniqueId.create_id()
return Photo(key_name='%s:%s' % (repo, id), repo=repo, **kwargs)
@staticmethod
def get(repo, id):
return Photo.get_by_key_name('%s:%s' % (repo, id))
class Authorization(db.Model):
"""Authorization keys. Key name: repo + ':' + auth_key."""
DEFAULT_SETTINGS = dict(contact_name='', contact_email='',
organization_name='', domain_write_permission='',
read_permission=False, full_read_permission=False,
search_permission=True, subscribe_permission=False,
mark_notes_reviewed=False, is_valid=True, key='')
# Even though the repo is part of the key_name, it is also stored
# redundantly as a separate property so it can be indexed and queried upon.
repo = db.StringProperty(required=True)
def summary_str(self):
"""Generates a summary of the key's current state.
Meant for logging.
"""
permissions_list = []
for permission_field in ['read_permission',
'full_read_permission',
'search_permission',
'subscribe_permission',
'mark_notes_reviewed',
'believed_dead_permission',
'stats_permission']:
if getattr(self, permission_field):
permissions_list.append(permission_field)
permissions = '; '.join(permissions_list)
return ('repo: %(repo)s\n'
'write domain: %(write_domain)s\n'
'permissions: %(permissions)s\n'
'valid: %(valid)s\n'
'contact name: %(contact_name)s\n'
'contact email: %(contact_email)s\n'
'organization name: %(org_name)s') % {
'repo': self.repo,
'write_domain': self.domain_write_permission or 'None',
'permissions': permissions,
'valid': self.is_valid,
'contact_name': self.contact_name,
'contact_email': self.contact_email,
'org_name': self.organization_name,
}
# If this field is non-empty, this authorization token allows the client
# to write records with this original domain.
domain_write_permission = db.StringProperty()
# If this flag is true, this authorization token allows the client to read
# non-sensitive fields (i.e. filtered by utils.filter_sensitive_fields).
read_permission = db.BooleanProperty()
# If this flag is true, this authorization token allows the client to read
# all fields (i.e. not filtered by utils.filter_sensitive_fields).
full_read_permission = db.BooleanProperty()
# If this flag is true, this authorization token allows the client to use
# the search API and return non-sensitive fields (i.e. filtered
# by utils.filter_sensitive_fields).
search_permission = db.BooleanProperty()
# If this flag is true, this authorization token allows the client to use
# the API to subscribe any e-mail address to updates on any person.
subscribe_permission = db.BooleanProperty()
# If this flag is true, notes written with this authorization token are
# marked as "reviewed" and won't show up in admin's review list.
mark_notes_reviewed = db.BooleanProperty()
# If this flag is true, notes written with this authorization token are
# allowed to have status == 'believed_dead'.
believed_dead_permission = db.BooleanProperty()
# If this flag is true, this key can be used to get overall statistics.
stats_permission = db.BooleanProperty()
# If this flag is False, the API access with this key won't be
# allowed.
is_valid = db.BooleanProperty(default=True)
# Bookkeeping information for humans, not used programmatically.
contact_name = db.StringProperty()
contact_email = db.StringProperty()
organization_name = db.StringProperty()
@property
def api_key(self):
"""Gets a key value excluding the repo part. """
if self.has_key():
return self.key().name().split(':')[1]
return None
@classmethod
def get(cls, repo, key):
"""Gets the Authorization entity for a given repository and key."""
return cls.get_by_key_name(repo + ':' + key)
@classmethod
def create(cls, repo, key, **kwargs):
"""Creates an Authorization entity for a given repository and key."""
return cls(key_name=repo + ':' + key, repo=repo, **kwargs)
class ApiKeyManagementLog(db.Model):
"""Log management history for API keys."""
CREATE = 'create'
UPDATE = 'update'
DELETE = 'delete'
ACTIONS = [CREATE, UPDATE, DELETE]
user = db.UserProperty(auto_current_user_add=True)
timestamp = db.DateTimeProperty(auto_now_add=True)
repo = db.StringProperty(required=True)
api_key = db.StringProperty(required=True)
action = db.StringProperty(required=True, choices=ACTIONS)
# The IP address of the admin making the change.
ip_address = db.StringProperty()
# A string representation of the state of the key after this action.
key_state = db.TextProperty()
@property
def authorization(self):
return Authorization.get(self.repo, self.api_key)
def encode_count_name(count_name):
"""Encode a name to printable ASCII characters so it can be safely
used as an attribute name for the datastore."""
encoded = []
append = encoded.append
for ch in map(ord, count_name):
if ch == 92:
append('\\\\')
elif 33 <= ch <= 126:
append(chr(ch))
else:
append('\\u%04x' % ch)
return ''.join(encoded)
class ApiActionLog(db.Model):
"""Log of api key usage."""
# actions
REPO = 'repo'
DELETE = 'delete'
READ = 'read'
SEARCH = 'search'
WRITE = 'write'
SUBSCRIBE = 'subscribe'
UNSUBSCRIBE = 'unsubscribe'
ACTIONS = [REPO, DELETE, READ, SEARCH, WRITE, SUBSCRIBE, UNSUBSCRIBE]
repo = db.StringProperty()
api_key = db.StringProperty()
action = db.StringProperty(required=True, choices=ACTIONS)
person_records = db.IntegerProperty()
note_records = db.IntegerProperty()
people_skipped = db.IntegerProperty() # write only
notes_skipped = db.IntegerProperty() # write only
user_agent = db.StringProperty()
ip_address = db.StringProperty() # client ip
request_url = db.StringProperty()
version = db.StringProperty() # pfif version.
timestamp = db.DateTimeProperty(auto_now=True)
@staticmethod
def record_action(repo, api_key, version, action, person_records,
note_records, people_skipped, notes_skipped, user_agent,
ip_address, request_url,
timestamp=None):
import utils
try:
ApiActionLog(repo=repo,
api_key=api_key,
action=action,
person_records=person_records,
note_records=note_records,
people_skipped=people_skipped,
notes_skipped=notes_skipped,
user_agent=user_agent,
ip_address=ip_address,
request_url=request_url,
version=version,
timestamp=timestamp or utils.get_utcnow()).put()
except Exception:
# swallow anything to prevent the main action from failing.
pass
class Counter(db.Expando):
"""Counters hold partial and completed results for ongoing counting tasks.
To see how this is used, check out tasks.py. A single Counter object can
contain several named accumulators. Typical usage is to scan for entities
in order by __key__, update the accumulators for each entity, and save the
partial counts when the time limit for a request is reached. The last
scanned key is saved in last_key so the next request can pick up the scan
where the last one left off. A non-empty last_key means a scan is not
finished; when a scan is done, last_key should be set to ''."""
timestamp = db.DateTimeProperty(auto_now=True)
scan_name = db.StringProperty()
repo = db.StringProperty()
last_key = db.StringProperty(default='') # if non-empty, count is partial
# Each Counter also has a dynamic property for each accumulator; all such
# properties are named "count_" followed by a count_name. The count_name
# is encoded to ensure all its characters are printable ASCII.
def get(self, count_name):
"""Gets the specified accumulator from this counter object."""
return getattr(self, 'count_' + encode_count_name(count_name), 0)
def increment(self, count_name):
"""Increments the given accumulator on this Counter object."""
prop_name = 'count_' + encode_count_name(count_name)
setattr(self, prop_name, getattr(self, prop_name, 0) + 1)
@classmethod
def get_count(cls, repo, name):
"""Gets the latest finished count for the given repository and name.
'name' should be in the format scan_name + '.' + count_name."""
scan_name, count_name = name.split('.')
count_name = encode_count_name(count_name)
return cls.get_all_counts(repo, scan_name).get(count_name, 0)
@classmethod
def get_all_counts(cls, repo, scan_name):
"""Gets a dictionary of all the counts for the last completed scan
for the given repository and scan name."""
counter_key = repo + ':' + scan_name
# Get the counts from memcache, loading from datastore if necessary.
counter_dict = memcache.get(counter_key)