Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add separate image similarity flask server #88

Merged
merged 6 commits into from
Mar 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 59 additions & 57 deletions api/autoalbum.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,88 +12,83 @@

import ipdb

# from api.flags import \
# is_auto_albums_being_processed, \
# is_photos_being_added, \
# set_auto_album_processing_flag_on, \
# set_auto_album_processing_flag_off
from django_rq import job

from tqdm import tqdm
import rq
from api.util import logger
import pytz

@job
def regenerate_event_titles(user):
job_id = rq.get_current_job().id

# def is_auto_albums_being_processed():
# global FLAG_IS_AUTO_ALBUMS_BEING_PROCESSED
# return {"status":FLAG_IS_AUTO_ALBUMS_BEING_PROCESSED}

# # check if there are auto albums being generated right now
# if AlbumAuto.objects.count() > 0:
# last_album_auto_created_on = AlbumAuto.objects.order_by('-created_on')[0].created_on
# now = datetime.utcnow().replace(tzinfo=last_album_auto_created_on.tzinfo)
# td = (now-last_album_auto_created_on).total_seconds()
# if abs(td) < 10:
# status = True
# else:
# status = False
# else:
# status = False
# return {"status":status}

# go through all photos
if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()
else:
lrj = LongRunningJob.objects.create(
started_by=user,
job_id=job_id,
queued_at=datetime.now().replace(tzinfo=pytz.utc),
started_at=datetime.now().replace(tzinfo=pytz.utc),
job_type=LongRunningJob.JOB_GENERATE_AUTO_ALBUM_TITLES)
lrj.save()


@job
def regenerate_event_titles(user):
lrj = LongRunningJob(
started_by=user,
job_id=rq.get_current_job().id,
started_at=datetime.now(),
job_type=LongRunningJob.JOB_GENERATE_AUTO_ALBUM_TITLES)
lrj.save()

try:

aus = AlbumAuto.objects.filter(owner=user).prefetch_related('photos')
for au in tqdm(aus):
target_count = len(aus)
for idx,au in enumerate(aus):
logger.info('job {}: {}'.format(job_id,idx))
au._autotitle()
au.save()

lrj.result = {
'progress': {
"current": idx + 1,
"target": target_count
}
}
lrj.save()

status = True
message = 'success'
res = {'status': status, 'message': message}

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.finished = True
lrj.finished_at = datetime.now()
lrj.result = res
lrj.finished_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()
logger.info('job {}: updated lrj entry to db'.format(job_id))

except:
status = False
res = {'status': status, 'message': 'failed'}

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.failed = True
lrj.finished = True
lrj.finished_at = datetime.now()
lrj.finished_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()

return True
return 1


@job
def generate_event_albums(user):
job_id = rq.get_current_job().id
lrj = LongRunningJob(
started_by=user,
job_id=job_id,
started_at=datetime.now(),
job_type=LongRunningJob.JOB_GENERATE_AUTO_ALBUMS)
lrj.save()

if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()
else:
lrj = LongRunningJob.objects.create(
started_by=user,
job_id=job_id,
queued_at=datetime.now().replace(tzinfo=pytz.utc),
started_at=datetime.now().replace(tzinfo=pytz.utc),
job_type=LongRunningJob.JOB_GENERATE_AUTO_ALBUMS)
lrj.save()


try:
Expand All @@ -109,7 +104,7 @@ def group(photos_with_timestamp, dt=timedelta(hours=6)):
photos_with_timestamp = sorted(
photos_with_timestamp, key=lambda x: x[0])
groups = []
for photo in photos_with_timestamp:
for idx,photo in enumerate(photos_with_timestamp):
if len(groups) == 0:
groups.append([])
groups[-1].append(photo[1])
Expand All @@ -119,14 +114,18 @@ def group(photos_with_timestamp, dt=timedelta(hours=6)):
else:
groups.append([])
groups[-1].append(photo[1])
logger.info('job {}: {}'.format(job_id,idx))
return groups

groups = group(photos_with_timestamp, dt=timedelta(days=1, hours=12))
logger.info('job {}: made groups'.format(job_id))

album_locations = []

target_count = len(groups)

date_format = "%Y:%m:%d %H:%M:%S"
for group in groups:
for idx, group in enumerate(groups):
key = group[0].exif_timestamp
logger.info('job {}: processing auto album with date: '.format(job_id) + key.strftime(date_format))
items = group
Expand All @@ -153,24 +152,27 @@ def group(photos_with_timestamp, dt=timedelta(hours=6)):
album._autotitle()
album.save()
logger.info('job {}: generated auto album {}'.format(job_id,album.id))

lrj.result = {
'progress': {
"current": idx + 1,
"target": target_count
}
}
lrj.save()

status = True
message = 'success'
res = {'status': status, 'message': message}

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.finished = True
lrj.finished_at = datetime.now()
lrj.result = res
lrj.finished_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()

except:
status = False
res = {'status': status, 'message': 'failed'}

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.failed = True
lrj.finished = True
lrj.finished_at = datetime.now()
lrj.finished_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()

return 1
27 changes: 18 additions & 9 deletions api/directory_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
from config import image_dirs

import api.util as util
from api.image_similarity import build_image_similarity_index

import ipdb
from django_rq import job
import time
import numpy as np
import rq

from api.vector_bank import im2vec_bank

from django.db.models import Q
import json
Expand Down Expand Up @@ -141,7 +141,6 @@ def handle_new_image(user, image_path, job_id):

start = datetime.datetime.now()
photo._im2vec()
im2vec_bank.add_photo_to_index(photo)
elapsed = (datetime.datetime.now() - start).total_seconds()
elapsed_times['im2vec'] = elapsed
# util.logger.info('im2vec took %.2f' % elapsed)
Expand All @@ -165,12 +164,22 @@ def handle_new_image(user, image_path, job_id):
@job
def scan_photos(user):
job_id = rq.get_current_job().id
lrj = LongRunningJob(
started_by=user,
job_id=rq.get_current_job().id,
started_at=datetime.datetime.now().replace(tzinfo=pytz.utc),
job_type=LongRunningJob.JOB_SCAN_PHOTOS)
lrj.save()

if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.datetime.now().replace(tzinfo=pytz.utc)
lrj.save()
else:
lrj = LongRunningJob.objects.create(
started_by=user,
job_id=job_id,
queued_at=datetime.datetime.now().replace(tzinfo=pytz.utc),
started_at=datetime.datetime.now().replace(tzinfo=pytz.utc),
job_type=LongRunningJob.JOB_SCAN_PHOTOS)
lrj.save()




added_photo_count = 0
already_existing_photo = 0
Expand Down Expand Up @@ -214,6 +223,7 @@ def scan_photos(user):
'''

util.logger.info("Added {} photos".format(len(image_paths_to_add)))
build_image_similarity_index(user)

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.finished = True
Expand All @@ -225,7 +235,6 @@ def scan_photos(user):
lrj.save()
except Exception as e:
util.logger.error(str(e))
util.logger.error(str(traceback.format_exc()))
lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.finished = True
lrj.failed = True
Expand Down
42 changes: 30 additions & 12 deletions api/face_classify.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from api.models import Face
from api.models import Person
from api.models import LongRunningJob
from api.util import logger

import base64
import pickle
Expand All @@ -22,6 +23,7 @@
import seaborn as sns
from django_rq import job
import rq
import pytz

import datetime

Expand Down Expand Up @@ -68,12 +70,20 @@ def cluster_faces(user):

@job
def train_faces(user):
lrj = LongRunningJob(
started_by=user,
job_id=rq.get_current_job().id,
started_at=datetime.datetime.now(),
job_type=LongRunningJob.JOB_TRAIN_FACES)
lrj.save()
job_id = rq.get_current_job().id

if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.datetime.now().replace(tzinfo=pytz.utc)
lrj.save()
else:
lrj = LongRunningJob.objects.create(
started_by=user,
job_id=job_id,
queued_at=datetime.datetime.now().replace(tzinfo=pytz.utc),
started_at=datetime.datetime.now().replace(tzinfo=pytz.utc),
job_type=LongRunningJob.JOB_TRAIN_FACES)
lrj.save()

try:

Expand Down Expand Up @@ -137,30 +147,38 @@ def train_faces(user):
face_ids_unknown = [f['id'] for f in id2face_unknown.values()]
pred = clf.predict(face_encodings_unknown)
probs = np.max(clf.predict_proba(face_encodings_unknown), 1)
for face_id, person_name, probability in zip(face_ids_unknown, pred,
probs):

target_count = len(face_ids_unknown)

for idx, (face_id, person_name, probability) in enumerate(zip(face_ids_unknown, pred, probs)):
person = Person.objects.get(name=person_name)
face = Face.objects.get(id=face_id)
face.person = person
face.person_label_is_inferred = True
face.person_label_probability = probability
face.save()

lrj.result = {
'progress': {
"current": idx + 1,
"target": target_count
}
}
lrj.save()

# res = cluster_faces()
# print(res)

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.finished = True
lrj.failed = False
lrj.finished_at = datetime.datetime.now()
lrj.result = {}
lrj.save()
return True

except:
except BaseException as e:
logger.error(str(e))
res = []

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.failed = True
lrj.finished = True
lrj.finished_at = datetime.datetime.now()
Expand Down
46 changes: 46 additions & 0 deletions api/image_similarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from api.models import Photo, User
from api.util import logger
import requests
import numpy as np
from ownphotos.settings import IMAGE_SIMILARITY_SERVER

def search_similar_image(user,photo):
if type(user) == int:
user_id = user
else:
user_id = user.id

image_embedding = np.array(
np.frombuffer(bytes.fromhex(photo.encoding)), dtype=np.float32)
post_data = {
"user_id":user_id,
"image_embedding":image_embedding.tolist()
}
res = requests.post(IMAGE_SIMILARITY_SERVER+'/search/',json=post_data)
if res.status_code==200:
return res.json()
else:
logger.error('error retrieving similar photos to {} belonging to user {}'.format(photo.image_hash,user.username))
return []

def build_image_similarity_index(user):
logger.info('builing similarity index for user {}'.format(user.username))
photos = Photo.objects.filter(owner=user).exclude(encoding=None).only('encoding')

image_hashes = []
image_embeddings = []

for photo in photos:
image_hashes.append(photo.image_hash)
image_embedding = np.array(
np.frombuffer(bytes.fromhex(photo.encoding)), dtype=np.float32)
image_embeddings.append(image_embedding.tolist())

post_data = {
"user_id":user.id,
"image_hashes":image_hashes,
"image_embeddings":image_embeddings
}
res = requests.post(IMAGE_SIMILARITY_SERVER+'/build/',json=post_data)
return res.json()

Empty file added api/management/__init__.py
Empty file.
Loading