debian-django-haystack/haystack/backends/elasticsearch_backend.py

945 lines
35 KiB
Python

# encoding: utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
import datetime
import re
import warnings
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.utils import six
import haystack
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query
from haystack.constants import DEFAULT_OPERATOR, DJANGO_CT, DJANGO_ID, ID
from haystack.exceptions import MissingDependency, MoreLikeThisError, SkipDocument
from haystack.inputs import Clean, Exact, PythonData, Raw
from haystack.models import SearchResult
from haystack.utils import log as logging
from haystack.utils import get_identifier, get_model_ct
from haystack.utils.app_loading import haystack_get_model
try:
import elasticsearch
from elasticsearch.helpers import bulk_index
from elasticsearch.exceptions import NotFoundError
except ImportError:
raise MissingDependency("The 'elasticsearch' backend requires the installation of 'elasticsearch'. Please refer to the documentation.")
DATETIME_REGEX = re.compile(
r'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T'
r'(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d+)?$')
class ElasticsearchSearchBackend(BaseSearchBackend):
# Word reserved by Elasticsearch for special use.
RESERVED_WORDS = (
'AND',
'NOT',
'OR',
'TO',
)
# Characters reserved by Elasticsearch for special use.
# The '\\' must come first, so as not to overwrite the other slash replacements.
RESERVED_CHARACTERS = (
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
'[', ']', '^', '"', '~', '*', '?', ':', '/',
)
# Settings to add an n-gram & edge n-gram analyzer.
DEFAULT_SETTINGS = {
'settings': {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["haystack_ngram", "lowercase"]
},
"edgengram_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["haystack_edgengram", "lowercase"]
}
},
"tokenizer": {
"haystack_ngram_tokenizer": {
"type": "nGram",
"min_gram": 3,
"max_gram": 15,
},
"haystack_edgengram_tokenizer": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 15,
"side": "front"
}
},
"filter": {
"haystack_ngram": {
"type": "nGram",
"min_gram": 3,
"max_gram": 15
},
"haystack_edgengram": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 15
}
}
}
}
}
def __init__(self, connection_alias, **connection_options):
super(ElasticsearchSearchBackend, self).__init__(connection_alias, **connection_options)
if not 'URL' in connection_options:
raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias)
if not 'INDEX_NAME' in connection_options:
raise ImproperlyConfigured("You must specify a 'INDEX_NAME' in your settings for connection '%s'." % connection_alias)
self.conn = elasticsearch.Elasticsearch(connection_options['URL'], timeout=self.timeout, **connection_options.get('KWARGS', {}))
self.index_name = connection_options['INDEX_NAME']
self.log = logging.getLogger('haystack')
self.setup_complete = False
self.existing_mapping = {}
def setup(self):
"""
Defers loading until needed.
"""
# Get the existing mapping & cache it. We'll compare it
# during the ``update`` & if it doesn't match, we'll put the new
# mapping.
try:
self.existing_mapping = self.conn.indices.get_mapping(index=self.index_name)
except NotFoundError:
pass
except Exception:
if not self.silently_fail:
raise
unified_index = haystack.connections[self.connection_alias].get_unified_index()
self.content_field_name, field_mapping = self.build_schema(unified_index.all_searchfields())
current_mapping = {
'modelresult': {
'properties': field_mapping,
'_boost': {
'name': 'boost',
'null_value': 1.0
}
}
}
if current_mapping != self.existing_mapping:
try:
# Make sure the index is there first.
self.conn.indices.create(index=self.index_name, body=self.DEFAULT_SETTINGS, ignore=400)
self.conn.indices.put_mapping(index=self.index_name, doc_type='modelresult', body=current_mapping)
self.existing_mapping = current_mapping
except Exception:
if not self.silently_fail:
raise
self.setup_complete = True
def update(self, index, iterable, commit=True):
if not self.setup_complete:
try:
self.setup()
except elasticsearch.TransportError as e:
if not self.silently_fail:
raise
self.log.error("Failed to add documents to Elasticsearch: %s", e)
return
prepped_docs = []
for obj in iterable:
try:
prepped_data = index.full_prepare(obj)
final_data = {}
# Convert the data to make sure it's happy.
for key, value in prepped_data.items():
final_data[key] = self._from_python(value)
final_data['_id'] = final_data[ID]
prepped_docs.append(final_data)
except SkipDocument:
self.log.debug(u"Indexing for object `%s` skipped", obj)
except elasticsearch.TransportError as e:
if not self.silently_fail:
raise
# We'll log the object identifier but won't include the actual object
# to avoid the possibility of that generating encoding errors while
# processing the log message:
self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={
"data": {
"index": index,
"object": get_identifier(obj)
}
})
bulk_index(self.conn, prepped_docs, index=self.index_name, doc_type='modelresult')
if commit:
self.conn.indices.refresh(index=self.index_name)
def remove(self, obj_or_string, commit=True):
doc_id = get_identifier(obj_or_string)
if not self.setup_complete:
try:
self.setup()
except elasticsearch.TransportError as e:
if not self.silently_fail:
raise
self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e)
return
try:
self.conn.delete(index=self.index_name, doc_type='modelresult', id=doc_id, ignore=404)
if commit:
self.conn.indices.refresh(index=self.index_name)
except elasticsearch.TransportError as e:
if not self.silently_fail:
raise
self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e)
def clear(self, models=[], commit=True):
# We actually don't want to do this here, as mappings could be
# very different.
# if not self.setup_complete:
# self.setup()
try:
if not models:
self.conn.indices.delete(index=self.index_name, ignore=404)
self.setup_complete = False
self.existing_mapping = {}
else:
models_to_delete = []
for model in models:
models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model)))
# Delete by query in Elasticsearch asssumes you're dealing with
# a ``query`` root object. :/
query = {'query': {'query_string': {'query': " OR ".join(models_to_delete)}}}
self.conn.delete_by_query(index=self.index_name, doc_type='modelresult', body=query)
except elasticsearch.TransportError as e:
if not self.silently_fail:
raise
if len(models):
self.log.error("Failed to clear Elasticsearch index of models '%s': %s", ','.join(models_to_delete), e)
else:
self.log.error("Failed to clear Elasticsearch index: %s", e)
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
fields='', highlight=False, facets=None,
date_facets=None, query_facets=None,
narrow_queries=None, spelling_query=None,
within=None, dwithin=None, distance_point=None,
models=None, limit_to_registered_models=None,
result_class=None):
index = haystack.connections[self.connection_alias].get_unified_index()
content_field = index.document_field
if query_string == '*:*':
kwargs = {
'query': {
"match_all": {}
},
}
else:
kwargs = {
'query': {
'query_string': {
'default_field': content_field,
'default_operator': DEFAULT_OPERATOR,
'query': query_string,
'analyze_wildcard': True,
'auto_generate_phrase_queries': True,
},
},
}
# so far, no filters
filters = []
if fields:
if isinstance(fields, (list, set)):
fields = " ".join(fields)
kwargs['fields'] = fields
if sort_by is not None:
order_list = []
for field, direction in sort_by:
if field == 'distance' and distance_point:
# Do the geo-enabled sort.
lng, lat = distance_point['point'].get_coords()
sort_kwargs = {
"_geo_distance": {
distance_point['field']: [lng, lat],
"order": direction,
"unit": "km"
}
}
else:
if field == 'distance':
warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")
# Regular sorting.
sort_kwargs = {field: {'order': direction}}
order_list.append(sort_kwargs)
kwargs['sort'] = order_list
# From/size offsets don't seem to work right in Elasticsearch's DSL. :/
# if start_offset is not None:
# kwargs['from'] = start_offset
# if end_offset is not None:
# kwargs['size'] = end_offset - start_offset
if highlight is True:
kwargs['highlight'] = {
'fields': {
content_field: {'store': 'yes'},
}
}
if self.include_spelling:
kwargs['suggest'] = {
'suggest': {
'text': spelling_query or query_string,
'term': {
# Using content_field here will result in suggestions of stemmed words.
'field': '_all',
},
},
}
if narrow_queries is None:
narrow_queries = set()
if facets is not None:
kwargs.setdefault('facets', {})
for facet_fieldname, extra_options in facets.items():
facet_options = {
'terms': {
'field': facet_fieldname,
'size': 100,
},
}
# Special cases for options applied at the facet level (not the terms level).
if extra_options.pop('global_scope', False):
# Renamed "global_scope" since "global" is a python keyword.
facet_options['global'] = True
if 'facet_filter' in extra_options:
facet_options['facet_filter'] = extra_options.pop('facet_filter')
facet_options['terms'].update(extra_options)
kwargs['facets'][facet_fieldname] = facet_options
if date_facets is not None:
kwargs.setdefault('facets', {})
for facet_fieldname, value in date_facets.items():
# Need to detect on gap_by & only add amount if it's more than one.
interval = value.get('gap_by').lower()
# Need to detect on amount (can't be applied on months or years).
if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'):
# Just the first character is valid for use.
interval = "%s%s" % (value['gap_amount'], interval[:1])
kwargs['facets'][facet_fieldname] = {
'date_histogram': {
'field': facet_fieldname,
'interval': interval,
},
'facet_filter': {
"range": {
facet_fieldname: {
'from': self._from_python(value.get('start_date')),
'to': self._from_python(value.get('end_date')),
}
}
}
}
if query_facets is not None:
kwargs.setdefault('facets', {})
for facet_fieldname, value in query_facets:
kwargs['facets'][facet_fieldname] = {
'query': {
'query_string': {
'query': value,
}
},
}
if limit_to_registered_models is None:
limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
if models and len(models):
model_choices = sorted(get_model_ct(model) for model in models)
elif limit_to_registered_models:
# Using narrow queries, limit the results to only models handled
# with the current routers.
model_choices = self.build_models_list()
else:
model_choices = []
if len(model_choices) > 0:
filters.append({"terms": {DJANGO_CT: model_choices}})
for q in narrow_queries:
filters.append({
'fquery': {
'query': {
'query_string': {
'query': q
},
},
'_cache': True,
}
})
if within is not None:
from haystack.utils.geo import generate_bounding_box
((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2'])
within_filter = {
"geo_bounding_box": {
within['field']: {
"top_left": {
"lat": north,
"lon": west
},
"bottom_right": {
"lat": south,
"lon": east
}
}
},
}
filters.append(within_filter)
if dwithin is not None:
lng, lat = dwithin['point'].get_coords()
# NB: the 1.0.0 release of elasticsearch introduce an
# incompatible change on the distance filter formating
if elasticsearch.VERSION >= (1, 0, 0):
distance = "%(dist).6f%(unit)s" % {
'dist': dwithin['distance'].km,
'unit': "km"
}
else:
distance = dwithin['distance'].km
dwithin_filter = {
"geo_distance": {
"distance": distance,
dwithin['field']: {
"lat": lat,
"lon": lng
}
}
}
filters.append(dwithin_filter)
# if we want to filter, change the query type to filteres
if filters:
kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}}
if len(filters) == 1:
kwargs['query']['filtered']["filter"] = filters[0]
else:
kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}}
return kwargs
@log_query
def search(self, query_string, **kwargs):
if len(query_string) == 0:
return {
'results': [],
'hits': 0,
}
if not self.setup_complete:
self.setup()
search_kwargs = self.build_search_kwargs(query_string, **kwargs)
search_kwargs['from'] = kwargs.get('start_offset', 0)
order_fields = set()
for order in search_kwargs.get('sort', []):
for key in order.keys():
order_fields.add(key)
geo_sort = '_geo_distance' in order_fields
end_offset = kwargs.get('end_offset')
start_offset = kwargs.get('start_offset', 0)
if end_offset is not None and end_offset > start_offset:
search_kwargs['size'] = end_offset - start_offset
try:
raw_results = self.conn.search(body=search_kwargs,
index=self.index_name,
doc_type='modelresult',
_source=True)
except elasticsearch.TransportError as e:
if not self.silently_fail:
raise
self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e)
raw_results = {}
return self._process_results(raw_results,
highlight=kwargs.get('highlight'),
result_class=kwargs.get('result_class', SearchResult),
distance_point=kwargs.get('distance_point'),
geo_sort=geo_sort)
def more_like_this(self, model_instance, additional_query_string=None,
start_offset=0, end_offset=None, models=None,
limit_to_registered_models=None, result_class=None, **kwargs):
from haystack import connections
if not self.setup_complete:
self.setup()
# Deferred models will have a different class ("RealClass_Deferred_fieldname")
# which won't be in our registry:
model_klass = model_instance._meta.concrete_model
index = connections[self.connection_alias].get_unified_index().get_index(model_klass)
field_name = index.get_content_field()
params = {}
if start_offset is not None:
params['search_from'] = start_offset
if end_offset is not None:
params['search_size'] = end_offset - start_offset
doc_id = get_identifier(model_instance)
try:
raw_results = self.conn.mlt(index=self.index_name, doc_type='modelresult', id=doc_id, mlt_fields=[field_name], **params)
except elasticsearch.TransportError as e:
if not self.silently_fail:
raise
self.log.error("Failed to fetch More Like This from Elasticsearch for document '%s': %s", doc_id, e)
raw_results = {}
return self._process_results(raw_results, result_class=result_class)
def _process_results(self, raw_results, highlight=False,
result_class=None, distance_point=None,
geo_sort=False):
from haystack import connections
results = []
hits = raw_results.get('hits', {}).get('total', 0)
facets = {}
spelling_suggestion = None
if result_class is None:
result_class = SearchResult
if self.include_spelling and 'suggest' in raw_results:
raw_suggest = raw_results['suggest'].get('suggest')
if raw_suggest:
spelling_suggestion = ' '.join([word['text'] if len(word['options']) == 0 else word['options'][0]['text'] for word in raw_suggest])
if 'facets' in raw_results:
facets = {
'fields': {},
'dates': {},
'queries': {},
}
for facet_fieldname, facet_info in raw_results['facets'].items():
if facet_info.get('_type', 'terms') == 'terms':
facets['fields'][facet_fieldname] = [(individual['term'], individual['count']) for individual in facet_info['terms']]
elif facet_info.get('_type', 'terms') == 'date_histogram':
# Elasticsearch provides UTC timestamps with an extra three
# decimals of precision, which datetime barfs on.
facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['time'] / 1000), individual['count']) for individual in facet_info['entries']]
elif facet_info.get('_type', 'terms') == 'query':
facets['queries'][facet_fieldname] = facet_info['count']
unified_index = connections[self.connection_alias].get_unified_index()
indexed_models = unified_index.get_indexed_models()
content_field = unified_index.document_field
for raw_result in raw_results.get('hits', {}).get('hits', []):
source = raw_result['_source']
app_label, model_name = source[DJANGO_CT].split('.')
additional_fields = {}
model = haystack_get_model(app_label, model_name)
if model and model in indexed_models:
for key, value in source.items():
index = unified_index.get_index(model)
string_key = str(key)
if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
additional_fields[string_key] = index.fields[string_key].convert(value)
else:
additional_fields[string_key] = self._to_python(value)
del(additional_fields[DJANGO_CT])
del(additional_fields[DJANGO_ID])
if 'highlight' in raw_result:
additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '')
if distance_point:
additional_fields['_point_of_origin'] = distance_point
if geo_sort and raw_result.get('sort'):
from haystack.utils.geo import Distance
additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0]))
else:
additional_fields['_distance'] = None
result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields)
results.append(result)
else:
hits -= 1
return {
'results': results,
'hits': hits,
'facets': facets,
'spelling_suggestion': spelling_suggestion,
}
def build_schema(self, fields):
content_field_name = ''
mapping = {
DJANGO_CT: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False},
DJANGO_ID: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False},
}
for field_name, field_class in fields.items():
field_mapping = FIELD_MAPPINGS.get(field_class.field_type, DEFAULT_FIELD_MAPPING).copy()
if field_class.boost != 1.0:
field_mapping['boost'] = field_class.boost
if field_class.document is True:
content_field_name = field_class.index_fieldname
# Do this last to override `text` fields.
if field_mapping['type'] == 'string':
if field_class.indexed is False or hasattr(field_class, 'facet_for'):
field_mapping['index'] = 'not_analyzed'
del field_mapping['analyzer']
mapping[field_class.index_fieldname] = field_mapping
return (content_field_name, mapping)
def _iso_datetime(self, value):
"""
If value appears to be something datetime-like, return it in ISO format.
Otherwise, return None.
"""
if hasattr(value, 'strftime'):
if hasattr(value, 'hour'):
return value.isoformat()
else:
return '%sT00:00:00' % value.isoformat()
def _from_python(self, value):
"""Convert more Python data types to ES-understandable JSON."""
iso = self._iso_datetime(value)
if iso:
return iso
elif isinstance(value, six.binary_type):
# TODO: Be stricter.
return six.text_type(value, errors='replace')
elif isinstance(value, set):
return list(value)
return value
def _to_python(self, value):
"""Convert values from ElasticSearch to native Python values."""
if isinstance(value, (int, float, complex, list, tuple, bool)):
return value
if isinstance(value, six.string_types):
possible_datetime = DATETIME_REGEX.search(value)
if possible_datetime:
date_values = possible_datetime.groupdict()
for dk, dv in date_values.items():
date_values[dk] = int(dv)
return datetime.datetime(
date_values['year'], date_values['month'],
date_values['day'], date_values['hour'],
date_values['minute'], date_values['second'])
try:
# This is slightly gross but it's hard to tell otherwise what the
# string's original type might have been. Be careful who you trust.
converted_value = eval(value)
# Try to handle most built-in types.
if isinstance(
converted_value,
(int, list, tuple, set, dict, float, complex)):
return converted_value
except Exception:
# If it fails (SyntaxError or its ilk) or we don't trust it,
# continue on.
pass
return value
# DRL_FIXME: Perhaps move to something where, if none of these
# match, call a custom method on the form that returns, per-backend,
# the right type of storage?
DEFAULT_FIELD_MAPPING = {'type': 'string', 'analyzer': 'snowball'}
FIELD_MAPPINGS = {
'edge_ngram': {'type': 'string', 'analyzer': 'edgengram_analyzer'},
'ngram': {'type': 'string', 'analyzer': 'ngram_analyzer'},
'date': {'type': 'date'},
'datetime': {'type': 'date'},
'location': {'type': 'geo_point'},
'boolean': {'type': 'boolean'},
'float': {'type': 'float'},
'long': {'type': 'long'},
'integer': {'type': 'long'},
}
# Sucks that this is almost an exact copy of what's in the Solr backend,
# but we can't import due to dependencies.
class ElasticsearchSearchQuery(BaseSearchQuery):
def matching_all_fragment(self):
return '*:*'
def build_query_fragment(self, field, filter_type, value):
from haystack import connections
query_frag = ''
if not hasattr(value, 'input_type_name'):
# Handle when we've got a ``ValuesListQuerySet``...
if hasattr(value, 'values_list'):
value = list(value)
if isinstance(value, six.string_types):
# It's not an ``InputType``. Assume ``Clean``.
value = Clean(value)
else:
value = PythonData(value)
# Prepare the query using the InputType.
prepared_value = value.prepare(self)
if not isinstance(prepared_value, (set, list, tuple)):
# Then convert whatever we get back to what pysolr wants if needed.
prepared_value = self.backend._from_python(prepared_value)
# 'content' is a special reserved word, much like 'pk' in
# Django's ORM layer. It indicates 'no special field'.
if field == 'content':
index_fieldname = ''
else:
index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field)
filter_types = {
'contains': u'%s',
'startswith': u'%s*',
'exact': u'%s',
'gt': u'{%s TO *}',
'gte': u'[%s TO *]',
'lt': u'{* TO %s}',
'lte': u'[* TO %s]',
}
if value.post_process is False:
query_frag = prepared_value
else:
if filter_type in ['contains', 'startswith']:
if value.input_type_name == 'exact':
query_frag = prepared_value
else:
# Iterate over terms & incorportate the converted form of each into the query.
terms = []
if isinstance(prepared_value, six.string_types):
for possible_value in prepared_value.split(' '):
terms.append(filter_types[filter_type] % self.backend._from_python(possible_value))
else:
terms.append(filter_types[filter_type] % self.backend._from_python(prepared_value))
if len(terms) == 1:
query_frag = terms[0]
else:
query_frag = u"(%s)" % " AND ".join(terms)
elif filter_type == 'in':
in_options = []
for possible_value in prepared_value:
in_options.append(u'"%s"' % self.backend._from_python(possible_value))
query_frag = u"(%s)" % " OR ".join(in_options)
elif filter_type == 'range':
start = self.backend._from_python(prepared_value[0])
end = self.backend._from_python(prepared_value[1])
query_frag = u'["%s" TO "%s"]' % (start, end)
elif filter_type == 'exact':
if value.input_type_name == 'exact':
query_frag = prepared_value
else:
prepared_value = Exact(prepared_value).prepare(self)
query_frag = filter_types[filter_type] % prepared_value
else:
if value.input_type_name != 'exact':
prepared_value = Exact(prepared_value).prepare(self)
query_frag = filter_types[filter_type] % prepared_value
if len(query_frag) and not isinstance(value, Raw):
if not query_frag.startswith('(') and not query_frag.endswith(')'):
query_frag = "(%s)" % query_frag
return u"%s%s" % (index_fieldname, query_frag)
def build_alt_parser_query(self, parser_name, query_string='', **kwargs):
if query_string:
kwargs['v'] = query_string
kwarg_bits = []
for key in sorted(kwargs.keys()):
if isinstance(kwargs[key], six.string_types) and ' ' in kwargs[key]:
kwarg_bits.append(u"%s='%s'" % (key, kwargs[key]))
else:
kwarg_bits.append(u"%s=%s" % (key, kwargs[key]))
return u"{!%s %s}" % (parser_name, ' '.join(kwarg_bits))
def build_params(self, spelling_query=None, **kwargs):
search_kwargs = {
'start_offset': self.start_offset,
'result_class': self.result_class
}
order_by_list = None
if self.order_by:
if order_by_list is None:
order_by_list = []
for field in self.order_by:
direction = 'asc'
if field.startswith('-'):
direction = 'desc'
field = field[1:]
order_by_list.append((field, direction))
search_kwargs['sort_by'] = order_by_list
if self.date_facets:
search_kwargs['date_facets'] = self.date_facets
if self.distance_point:
search_kwargs['distance_point'] = self.distance_point
if self.dwithin:
search_kwargs['dwithin'] = self.dwithin
if self.end_offset is not None:
search_kwargs['end_offset'] = self.end_offset
if self.facets:
search_kwargs['facets'] = self.facets
if self.fields:
search_kwargs['fields'] = self.fields
if self.highlight:
search_kwargs['highlight'] = self.highlight
if self.models:
search_kwargs['models'] = self.models
if self.narrow_queries:
search_kwargs['narrow_queries'] = self.narrow_queries
if self.query_facets:
search_kwargs['query_facets'] = self.query_facets
if self.within:
search_kwargs['within'] = self.within
if spelling_query:
search_kwargs['spelling_query'] = spelling_query
return search_kwargs
def run(self, spelling_query=None, **kwargs):
"""Builds and executes the query. Returns a list of search results."""
final_query = self.build_query()
search_kwargs = self.build_params(spelling_query, **kwargs)
if kwargs:
search_kwargs.update(kwargs)
results = self.backend.search(final_query, **search_kwargs)
self._results = results.get('results', [])
self._hit_count = results.get('hits', 0)
self._facet_counts = self.post_process_facets(results)
self._spelling_suggestion = results.get('spelling_suggestion', None)
def run_mlt(self, **kwargs):
"""Builds and executes the query. Returns a list of search results."""
if self._more_like_this is False or self._mlt_instance is None:
raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.")
additional_query_string = self.build_query()
search_kwargs = {
'start_offset': self.start_offset,
'result_class': self.result_class,
'models': self.models
}
if self.end_offset is not None:
search_kwargs['end_offset'] = self.end_offset - self.start_offset
results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs)
self._results = results.get('results', [])
self._hit_count = results.get('hits', 0)
class ElasticsearchSearchEngine(BaseEngine):
backend = ElasticsearchSearchBackend
query = ElasticsearchSearchQuery