collective.solr/src/collective/solr/search.py

162 lines
7.0 KiB
Python

from logging import getLogger
from time import time
from zope.interface import implements
from zope.component import queryUtility
from Missing import MV
from collective.solr.interfaces import ISolrConnectionConfig
from collective.solr.interfaces import ISolrConnectionManager
from collective.solr.interfaces import ISearch
from collective.solr.parser import SolrResponse
from collective.solr.exceptions import SolrInactiveException
from collective.solr.queryparser import quote
from collective.solr.utils import isWildCard
from collective.solr.utils import prepare_wildcard
logger = getLogger('collective.solr.search')
class Search(object):
""" a search utility for solr """
implements(ISearch)
def __init__(self):
self.manager = None
def getManager(self):
if self.manager is None:
self.manager = queryUtility(ISolrConnectionManager)
return self.manager
def search(self, query, **parameters):
""" perform a search with the given querystring and parameters """
start = time()
config = queryUtility(ISolrConnectionConfig)
manager = self.getManager()
manager.setSearchTimeout()
connection = manager.getConnection()
if connection is None:
raise SolrInactiveException
if not 'rows' in parameters:
parameters['rows'] = config.max_results or ''
logger.info('falling back to "max_results" (%d) without a "rows" '
'parameter: %r (%r)', config.max_results, query, parameters)
if getattr(config, 'highlight_fields', None):
if parameters.get('hl', 'false') == 'true' and not 'hl.fl' in parameters:
parameters['hl'] = 'true'
parameters['hl.fl'] = config.highlight_fields or []
parameters['hl.simple.pre'] = config.highlight_formatter_pre or ' '
parameters['hl.simple.post'] = config.highlight_formatter_post or ' '
parameters['hl.fragsize'] = getattr(config, 'highlight_fragsize', None) or 100
if not 'fl' in parameters:
if config.field_list:
parameters['fl'] = ' '.join(config.field_list)
else:
parameters['fl'] = '* score'
if isinstance(query, dict):
query = ' '.join(query.values())
logger.debug('searching for %r (%r)', query, parameters)
if 'sort' in parameters: # issue warning for unknown sort indices
index, order = parameters['sort'].split()
schema = manager.getSchema() or {}
field = schema.get(index, None)
if field is None or not field.stored:
logger.warning('sorting on non-stored attribute "%s"', index)
response = connection.search(q=query, **parameters)
results = SolrResponse(response)
response.close()
manager.setTimeout(None)
elapsed = (time() - start) * 1000
slow = config.slow_query_threshold
if slow and elapsed >= slow:
logger.info('slow query: %d/%d ms for %r (%r)',
results.responseHeader['QTime'], elapsed, query, parameters)
logger.debug('highlighting info: %s' % getattr(results, 'highlighting', {}))
return results
__call__ = search
def buildQuery(self, default=None, **args):
""" helper to build a querystring for simple use-cases """
logger.debug('building query for "%r", %r', default, args)
schema = self.getManager().getSchema() or {}
defaultSearchField = getattr(schema, 'defaultSearchField', None)
args[None] = default
query = {}
for name, value in sorted(args.items()):
field = schema.get(name or defaultSearchField, None)
if field is None or not field.indexed:
logger.info('dropping unknown search attribute "%s" '
' (%r) for query: %r', name, value, args)
continue
if isinstance(value, bool):
value = str(value).lower()
elif not value: # solr doesn't like empty fields (+foo:"")
if not name:
continue
logger.info(
'empty search term form "%s:%s", aborting buildQuery' % (
name,
value
)
)
return {}
elif field.class_ == 'solr.BoolField':
if not isinstance(value, (tuple, list)):
value = [value]
falses = '0', 'False', MV
true = lambda v: bool(v) and v not in falses
value = set(map(true, value))
if not len(value) == 1:
assert len(value) == 2 # just to make sure
continue # skip when "true or false"
value = str(value.pop()).lower()
elif isinstance(value, (tuple, list)):
# list items should be treated as literals, but
# nevertheless only get quoted when necessary
def quoteitem(term):
if isinstance(term, unicode):
term = term.encode('utf-8')
quoted = quote(term)
if not quoted.startswith('"') and not quoted == term:
quoted = quote('"' + term + '"')
return quoted
value = '(%s)' % ' OR '.join(map(quoteitem, value))
elif isinstance(value, set): # sets are taken literally
if len(value) == 1:
query[name] = ''.join(value)
else:
query[name] = '(%s)' % ' OR '.join(value)
continue
elif isinstance(value, basestring):
if field.class_ == 'solr.TextField':
if isWildCard(value):
value = prepare_wildcard(value)
value = quote(value, textfield=True)
# if we have an intra-word hyphen, we need quotes
if '\\-' in value or '\\+' in value:
if value[0] != '"':
value = '"%s"' % value
else:
value = quote(value)
# Solr 4.0 added regular expression support, which means
# that '/' is now a special character and must be escaped
# if searching for literal forward slash.
if '/' in value:
value = value.replace('/', '\/')
if not value: # don't search for empty strings, even quoted
continue
else:
logger.info('skipping unsupported value "%r" (%s)',
value, name)
continue
if name is None:
if value and value[0] not in '+-':
value = '+%s' % value
else:
value = '+%s:%s' % (name, value)
query[name] = value
logger.debug('built query "%s"', query)
return query