Last active
December 14, 2017 13:45
-
-
Save acdha/3750774 to your computer and use it in GitHub Desktop.
Revisions
-
acdha revised this gist
Jan 3, 2014 . 1 changed file with 15 additions and 9 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,24 +1,25 @@ # encoding: utf-8 """Experimental Solr Grouping / Field Collapsing backend for Haystack 2.0""" # NOTE: You must be running the latest Pysolr master - no PyPI release yet! # See https://gist.github.com/3750774 for the current version of this code # See http://wiki.apache.org/solr/FieldCollapsing for the Solr feature documentation from __future__ import absolute_import import logging from django.db.models.loading import get_model from haystack.backends import EmptyResults from haystack.backends.solr_backend import SolrEngine, SolrSearchBackend, SolrSearchQuery from haystack.constants import DJANGO_CT, DJANGO_ID, ID from haystack.models import SearchResult from haystack.query import SearchQuerySet # Since there's no chance of this being portable (yet!) we'll import explicitly # rather than using the generic imports: class GroupedSearchQuery(SolrSearchQuery): def __init__(self, *args, **kwargs): super(GroupedSearchQuery, self).__init__(*args, **kwargs) self.grouping_field = None @@ -37,10 +38,10 @@ def post_process_facets(self, results): # See matches dance in _process_results below: total = 0 if 'hits' in results: total = int(results['hits']) elif 'matches' in results: total = int(results['matches']) self._total_document_count = total @@ -63,13 +64,14 @@ def build_params(self, *args, **kwargs): 'group.field': self.grouping_field, 'group.ngroups': 'true', 'group.limit': 2, # TODO: Don't hard-code this 'group.sort': 'django_ct desc, score desc', 'group.facet': 'true', 'result_class': GroupedSearchResult}) return res class GroupedSearchResult(object): def __init__(self, field_name, group_data, raw_results={}): self.field_name = field_name self.key = group_data['groupValue'] # TODO: convert _to_python @@ -117,6 +119,7 @@ def process_documents(self, doclist, raw_results): class GroupedSearchQuerySet(SearchQuerySet): def __init__(self, *args, **kwargs): super(GroupedSearchQuerySet, self).__init__(*args, **kwargs) @@ -148,12 +151,15 @@ def total_document_count(self): class GroupedSolrSearchBackend(SolrSearchBackend): def build_search_kwargs(self, *args, **kwargs): group_kwargs = [(i, kwargs.pop(i)) for i in kwargs.keys() if i.startswith("group")] res = super(GroupedSolrSearchBackend, self).build_search_kwargs(*args, **kwargs) res.update(group_kwargs) if group_kwargs and 'sort' not in kwargs: res['sort'] = 'score desc, item_id asc' return res -
acdha revised this gist
Mar 11, 2013 . 1 changed file with 43 additions and 13 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,6 @@ # encoding: utf-8 """Experimental Solr Grouping / Field Collapsing backend for Haystack 2.0""" # NOTE: Requires pysolr 3.0.6+ # See https://gist.github.com/3750774 for the current version of this code # See http://wiki.apache.org/solr/FieldCollapsing for the Solr feature documentation from __future__ import absolute_import @@ -22,6 +22,7 @@ class GroupedSearchQuery(SolrSearchQuery): def __init__(self, *args, **kwargs): super(GroupedSearchQuery, self).__init__(*args, **kwargs) self.grouping_field = None self._total_document_count = None def _clone(self, **kwargs): clone = super(GroupedSearchQuery, self)._clone(**kwargs) @@ -31,6 +32,30 @@ def _clone(self, **kwargs): def add_group_by(self, field_name): self.grouping_field = field_name def post_process_facets(self, results): # FIXME: remove this hack once https://github.com/toastdriven/django-haystack/issues/750 lands # See matches dance in _process_results below: total = 0 if 'matches' in results: total = int(results['matches']) elif 'hits' in results: total = int(results['hits']) self._total_document_count = total return super(GroupedSearchQuery, self).post_process_facets(results) def get_total_document_count(self): """Return the total number of matching documents rather than document groups If the query has not been run, this will execute the query and store the results. """ if self._total_document_count is None: self.run() return self._total_document_count def build_params(self, *args, **kwargs): res = super(GroupedSearchQuery, self).build_params(*args, **kwargs) if self.grouping_field is not None: @@ -39,6 +64,7 @@ def build_params(self, *args, **kwargs): 'group.ngroups': 'true', 'group.limit': 2, # TODO: Don't hard-code this 'group.sort': 'score desc', 'group.facet': 'true', 'result_class': GroupedSearchResult}) return res @@ -85,16 +111,6 @@ def process_documents(self, doclist, raw_results): if raw_result[ID] in getattr(raw_results, 'highlighting', {}): additional_fields['highlighted'] = raw_results.highlighting[raw_result[ID]] result = SearchResult(app_label, model_name, raw_result[DJANGO_ID], raw_result['score'], **additional_fields) yield result @@ -117,6 +133,19 @@ def post_process_results(self, results): # Override the default model-specific processing return results def total_document_count(self): """Returns the count for the total number of matching documents rather than groups A GroupedSearchQuerySet normally returns the number of document groups; this allows you to indicate the total number of matching documents - quite handy for making facet counts match the displayed numbers """ if self.query.has_run(): return self.query.get_total_document_count() else: clone = self._clone() return clone.query.get_total_document_count() class GroupedSolrSearchBackend(SolrSearchBackend): def build_search_kwargs(self, *args, **kwargs): @@ -145,13 +174,14 @@ def _process_results(self, raw_results, result_class=None, **kwargs): if isinstance(raw_results, EmptyResults): return res assert len(raw_results.grouped) == 1, "Grouping on more than one field is not supported" res['results'] = results = [] for field_name, field_group in raw_results.grouped.items(): res['hits'] = field_group['ngroups'] res['matches'] = field_group['matches'] for group in field_group['groups']: if group['groupValue'] is None: logging.warning("Unexpected NULL grouping", extra={'data': raw_results}) res['hits'] -= 1 # Avoid confusing Haystack with excluded bogon results continue -
acdha revised this gist
Nov 21, 2012 . 1 changed file with 6 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -129,16 +129,16 @@ def build_search_kwargs(self, *args, **kwargs): return res def _process_results(self, raw_results, result_class=None, **kwargs): res = super(GroupedSolrSearchBackend, self)._process_results(raw_results, result_class=result_class, **kwargs) if result_class and not issubclass(result_class, GroupedSearchResult): return res if len(raw_results.docs): raise RuntimeError("Grouped Solr searches should return grouped elements, not docs!") assert not res['results'] assert not res['hits'] @@ -153,6 +153,7 @@ def _process_results(self, raw_results, result_class=None, **kwargs): for group in field_group['groups']: if group['groupValue'] == None: logging.warning("Unexpected NULL grouping", extra={'data': raw_results}) res['hits'] -= 1 # Avoid confusing Haystack with excluded bogon results continue results.append(result_class(field_name, group, raw_results=raw_results)) -
acdha revised this gist
Sep 27, 2012 . 1 changed file with 5 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -12,6 +12,7 @@ from haystack.constants import ID, DJANGO_CT, DJANGO_ID from haystack.models import SearchResult from haystack.query import SearchQuerySet from haystack.backends import EmptyResults # Since there's no chance of this being portable (yet!) we'll import explicitly # rather than using the generic imports: from haystack.backends.solr_backend import SolrEngine, SolrSearchBackend, SolrSearchQuery @@ -140,6 +141,10 @@ def _process_results(self, raw_results, result_class=None, **kwargs): assert not res['results'] assert not res['hits'] if isinstance(raw_results, EmptyResults): return res assert len(raw_results.grouped) == 1 res['results'] = results = [] -
acdha revised this gist
Sep 21, 2012 . 1 changed file with 9 additions and 10 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,9 +8,8 @@ import logging from django.db.models.loading import get_model from haystack.constants import ID, DJANGO_CT, DJANGO_ID from haystack.models import SearchResult from haystack.query import SearchQuerySet # Since there's no chance of this being portable (yet!) we'll import explicitly @@ -38,22 +37,23 @@ def build_params(self, *args, **kwargs): 'group.field': self.grouping_field, 'group.ngroups': 'true', 'group.limit': 2, # TODO: Don't hard-code this 'group.sort': 'score desc', 'result_class': GroupedSearchResult}) return res class GroupedSearchResult(object): def __init__(self, field_name, group_data, raw_results={}): self.field_name = field_name self.key = group_data['groupValue'] # TODO: convert _to_python self.hits = group_data['doclist']['numFound'] self.documents = list(self.process_documents(group_data['doclist']['docs'], raw_results=raw_results)) def __unicode__(self): return 'GroupedSearchResult({0.field_name}={0.group_key}, hits={0.hits})'.format(self) def process_documents(self, doclist, raw_results): # TODO: tame import spaghetti from haystack import connections engine = connections["en"] @@ -81,9 +81,8 @@ def process_documents(self, doclist): del(additional_fields[DJANGO_ID]) del(additional_fields['score']) if raw_result[ID] in getattr(raw_results, 'highlighting', {}): additional_fields['highlighted'] = raw_results.highlighting[raw_result[ID]] # TODO: restore distance_point processing if False: # distance_point: @@ -150,7 +149,7 @@ def _process_results(self, raw_results, result_class=None, **kwargs): if group['groupValue'] == None: logging.warning("Unexpected NULL grouping", extra={'data': raw_results}) continue results.append(result_class(field_name, group, raw_results=raw_results)) return res -
acdha revised this gist
Sep 21, 2012 . 1 changed file with 15 additions and 19 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -36,32 +36,24 @@ def build_params(self, *args, **kwargs): if self.grouping_field is not None: res.update({'group': 'true', 'group.field': self.grouping_field, 'group.ngroups': 'true', 'group.limit': 2, # TODO: Don't hard-code this 'group.sort': 'django_ct asc', 'result_class': GroupedSearchResult}) return res class GroupedSearchResult(object): def __init__(self, field_name, group_data): self.field_name = field_name self.key = group_data['groupValue'] # TODO: convert _to_python self.hits = group_data['doclist']['numFound'] self.documents = list(self.process_documents(group_data['doclist']['docs'])) def __unicode__(self): return 'GroupedSearchResult({0.field_name}={0.group_key}, hits={0.hits})'.format(self) def process_documents(self, doclist): # TODO: tame import spaghetti from haystack import connections engine = connections["en"] @@ -149,12 +141,16 @@ def _process_results(self, raw_results, result_class=None, **kwargs): assert not res['results'] assert not res['hits'] assert len(raw_results.grouped) == 1 res['results'] = results = [] for field_name, field_group in raw_results.grouped.items(): res['hits'] = field_group['ngroups'] for group in field_group['groups']: if group['groupValue'] == None: logging.warning("Unexpected NULL grouping", extra={'data': raw_results}) continue results.append(result_class(field_name, group)) return res -
acdha revised this gist
Sep 19, 2012 . 1 changed file with 6 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,8 @@ # encoding: utf-8 """Experimental Solr Grouping / Field Collapsing backend for Haystack 2.0""" # NOTE: You must be running the latest Pysolr master - no PyPI release yet! # See https://gist.github.com/3750774 for the current version of this code # See http://wiki.apache.org/solr/FieldCollapsing for the Solr feature documentation from __future__ import absolute_import import logging @@ -33,7 +36,7 @@ def build_params(self, *args, **kwargs): if self.grouping_field is not None: res.update({'group': 'true', 'group.field': self.grouping_field, 'group.limit': 3, # TODO: This should not be hard-coded 'group.ngroups': 'true', 'group.sort': 'django_ct desc', 'result_class': GroupedSearchResult}) @@ -119,6 +122,7 @@ def group_by(self, field_name): return clone def post_process_results(self, results): # Override the default model-specific processing return results -
acdha created this gist
Sep 19, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,160 @@ # encoding: utf-8 # See http://wiki.apache.org/solr/FieldCollapsing for the Solr feature information from __future__ import absolute_import import logging from django.db.models.loading import get_model from django.utils.datastructures import SortedDict from haystack.constants import DJANGO_CT, DJANGO_ID from haystack.models import SearchResult from haystack.query import SearchQuerySet # Since there's no chance of this being portable (yet!) we'll import explicitly # rather than using the generic imports: from haystack.backends.solr_backend import SolrEngine, SolrSearchBackend, SolrSearchQuery class GroupedSearchQuery(SolrSearchQuery): def __init__(self, *args, **kwargs): super(GroupedSearchQuery, self).__init__(*args, **kwargs) self.grouping_field = None def _clone(self, **kwargs): clone = super(GroupedSearchQuery, self)._clone(**kwargs) clone.grouping_field = self.grouping_field return clone def add_group_by(self, field_name): self.grouping_field = field_name def build_params(self, *args, **kwargs): res = super(GroupedSearchQuery, self).build_params(*args, **kwargs) if self.grouping_field is not None: res.update({'group': 'true', 'group.field': self.grouping_field, 'group.limit': 3, 'group.ngroups': 'true', 'group.sort': 'django_ct desc', 'result_class': GroupedSearchResult}) return res class GroupedSearchResult(object): def __init__(self, field_name, group): self.field_name = field_name self.count = group['ngroups'] self.total_documents = group['matches'] self.groups = SortedDict() for group in group['groups']: key = group['groupValue'] if key is None: logging.warning("Skipping empty key value") continue self.groups[key] = list(self.process_documents(group['doclist']['docs'])) def __unicode__(self): return 'GroupedSearchResult("{0.field_name}", count={0.count})'.format(self) def process_documents(self, doclist): # TODO: tame import spaghetti from haystack import connections engine = connections["en"] conn = engine.get_backend().conn unified_index = engine.get_unified_index() indexed_models = unified_index.get_indexed_models() for raw_result in doclist: app_label, model_name = raw_result[DJANGO_CT].split('.') additional_fields = {} model = get_model(app_label, model_name) if model and model in indexed_models: for key, value in raw_result.items(): index = unified_index.get_index(model) string_key = str(key) if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): additional_fields[string_key] = index.fields[string_key].convert(value) else: additional_fields[string_key] = conn._to_python(value) del(additional_fields[DJANGO_CT]) del(additional_fields[DJANGO_ID]) del(additional_fields['score']) # TODO: Add highlighting visibility # if raw_result[ID] in getattr(raw_results, 'highlighting', {}): # additional_fields['highlighted'] = raw_results.highlighting[raw_result[ID]] # TODO: restore distance_point processing if False: # distance_point: additional_fields['_point_of_origin'] = distance_point if raw_result.get('__dist__'): from haystack.utils.geo import Distance additional_fields['_distance'] = Distance(km=float(raw_result['__dist__'])) else: additional_fields['_distance'] = None result = SearchResult(app_label, model_name, raw_result[DJANGO_ID], raw_result['score'], **additional_fields) yield result class GroupedSearchQuerySet(SearchQuerySet): def __init__(self, *args, **kwargs): super(GroupedSearchQuerySet, self).__init__(*args, **kwargs) if not isinstance(self.query, GroupedSearchQuery): raise TypeError("GroupedSearchQuerySet must be used with a GroupedSearchQuery query") def group_by(self, field_name): """Have Solr group results based on the provided field name""" clone = self._clone() clone.query.add_group_by(field_name) return clone def post_process_results(self, results): return results class GroupedSolrSearchBackend(SolrSearchBackend): def build_search_kwargs(self, *args, **kwargs): group_kwargs = [(i, kwargs.pop(i)) for i in kwargs.keys() if i.startswith("group")] res = super(GroupedSolrSearchBackend, self).build_search_kwargs(*args, **kwargs) res.update(group_kwargs) return res def _process_results(self, raw_results, result_class=None, **kwargs): if result_class and not issubclass(result_class, GroupedSearchResult): raise TypeError("GroupedSolrSearchBackend requires use of the GroupedSearchResult result_class") if len(raw_results.docs): raise RuntimeError("Grouped Solr searches should return grouped elements, not docs!") res = super(GroupedSolrSearchBackend, self)._process_results(raw_results, result_class=result_class, **kwargs) assert not res['results'] assert not res['hits'] res['results'] = results = [] for field_name, groups in raw_results.grouped.items(): results.append(result_class(field_name, groups)) res['hits'] = len(results) return res class GroupedSolrEngine(SolrEngine): backend = GroupedSolrSearchBackend query = GroupedSearchQuery