Add levenshtein_distance setting to control panel and use it for fuzzy search.

This commit is contained in:
Timo Stollenwerk 2013-07-10 08:11:45 +02:00
parent 8ac769e97c
commit f0765648ee
7 changed files with 54 additions and 16 deletions

View File

@ -4,9 +4,8 @@ Changelog
4.0 - unreleased
------------------
- Support fuzzy search for SearchableText, currently without an admin GUI
to set the required config value.
[csenger]
- Support fuzzy search for SearchableText.
[csenger,timo]
- Make sure slashes are properly escaped in the search query. Solr 4.0 added
regular expression support, which means that '/' is now a special character

View File

@ -174,7 +174,8 @@ class SolrControlPanelAdapter(SchemaAdapterBase):
if util is not None:
util.filter_queries = value
filter_queries = property(getFilterQueryParameters, setFilterQueryParameters)
filter_queries = property(
getFilterQueryParameters, setFilterQueryParameters)
def getSlowQueryThreshold(self):
util = queryUtility(ISolrConnectionConfig)
@ -185,7 +186,8 @@ class SolrControlPanelAdapter(SchemaAdapterBase):
if util is not None:
util.slow_query_threshold = value
slow_query_threshold = property(getSlowQueryThreshold, setSlowQueryThreshold)
slow_query_threshold = property(
getSlowQueryThreshold, setSlowQueryThreshold)
def getEffectiveSteps(self):
util = queryUtility(ISolrConnectionConfig)
@ -229,7 +231,8 @@ class SolrControlPanelAdapter(SchemaAdapterBase):
if util is not None:
util.highlight_formatter_pre = value
highlight_formatter_pre = property(getHighlightFormatterPre, setHighlightFormatterPre)
highlight_formatter_pre = property(
getHighlightFormatterPre, setHighlightFormatterPre)
def getHighlightFormatterPost(self):
util = queryUtility(ISolrConnectionConfig)
@ -240,7 +243,8 @@ class SolrControlPanelAdapter(SchemaAdapterBase):
if util is not None:
util.highlight_formatter_post = value
highlight_formatter_post = property(getHighlightFormatterPost, setHighlightFormatterPost)
highlight_formatter_post = property(
getHighlightFormatterPost, setHighlightFormatterPost)
def getHighlightFragsize(self):
util = queryUtility(ISolrConnectionConfig)
@ -264,12 +268,25 @@ class SolrControlPanelAdapter(SchemaAdapterBase):
field_list = property(getFieldList, setFieldList)
def getLevenshteinDistance(self):
util = queryUtility(ISolrConnectionConfig)
return getattr(util, 'levenshtein_distance', '')
def setLevenshteinDistance(self, value):
util = queryUtility(ISolrConnectionConfig)
if util is not None:
util.levenshtein_distance = value
levenshtein_distance = property(
getLevenshteinDistance, setLevenshteinDistance)
class SolrControlPanel(ControlPanelForm):
form_fields = FormFields(ISolrSchema)
label = _('label_solr_settings', default='Solr settings')
description = _('help_solr_settings',
default='Settings to enable and configure Solr integration.')
description = _(
'help_solr_settings',
default='Settings to enable and configure Solr integration.')
form_name = _('label_solr_settings', default='Solr settings')

View File

@ -46,6 +46,7 @@ class SolrConfigXMLAdapter(XMLAdapterBase):
self.context.highlight_formatter_pre = ''
self.context.highlight_formatter_post = ''
self.context.highlight_fragsize = 0
self.context.levenshtein_distance = 0
def _initProperties(self, node):
elems = node.getElementsByTagName('connection')
@ -132,6 +133,9 @@ class SolrConfigXMLAdapter(XMLAdapterBase):
for elem in child.getElementsByTagName('parameter'):
value.append(elem.getAttribute('name'))
self.context.field_list = tuple(map(str, value))
elif child.nodeName == 'levenshtein_distance':
value = int(str(child.getAttribute('value')))
self.context.levenshtein_distance = value
def _createNode(self, name, value):
node = self._doc.createElement(name)
@ -191,6 +195,8 @@ class SolrConfigXMLAdapter(XMLAdapterBase):
append(create('highlight_fragsize', str(self.context.highlight_fragsize)))
field_list = self._doc.createElement('field-list')
append(field_list)
append(create('levenshtein_distance',
str(self.context.levenshtein_distance)))
for name in self.context.field_list:
param = self._doc.createElement('parameter')
param.setAttribute('name', name)

View File

@ -242,6 +242,16 @@ class ISolrSchema(Interface):
required=False
)
levenshtein_distance = Float(
title=_('label_levenshtein_distance',
default=u'Levenshtein distance'),
description=_(
'help_levenshtein_distance',
default=u'Specify a value between 0 and 1.'
),
required=False,
)
class ISolrConnectionConfig(ISolrSchema):
""" utility to hold the connection configuration for the solr server """

View File

@ -4,7 +4,6 @@ from DateTime import DateTime
from collective.solr.interfaces import ISolrConnectionConfig
from collective.solr.queryparser import quote
from collective.solr.utils import isSimpleTerm
from collective.solr.utils import isSimpleSearch
from collective.solr.utils import isWildCard
from collective.solr.utils import splitSimpleSearch
@ -21,9 +20,10 @@ sort_aliases = {
'sortable_title': 'Title',
}
query_args = ('range',
'operator',
'depth',
query_args = (
'range',
'operator',
'depth',
)
ignored = 'use_solr', '-C'
@ -33,8 +33,8 @@ def iso8601date(value):
""" convert `DateTime` to iso 8601 date format """
if isinstance(value, DateTime):
v = value.toZone('UTC')
value = '%04d-%02d-%02dT%02d:%02d:%06.3fZ' % (v.year(),
v.month(), v.day(), v.hour(), v.minute(), v.second())
value = '%04d-%02d-%02dT%02d:%02d:%06.3fZ' % (
v.year(), v.month(), v.day(), v.hour(), v.minute(), v.second())
return value
@ -62,7 +62,7 @@ def makeSimpleExpressions(term, levenstein_distance):
def mangleSearchableText(value, config):
pattern = getattr(config, 'search_pattern', '')
levenstein_distance = getattr(config, 'levenstein_distance', 0)
levenstein_distance = getattr(config, 'levenshtein_distance', 0)
value_parts = []
base_value_parts = []

View File

@ -33,5 +33,6 @@
value="]" />
<highlight_fragsize
value="100" />
<levenshtein_distance value="0" />
</settings>
</object>

View File

@ -55,6 +55,8 @@ active without *having* it active.
100
>>> config.field_list
[]
>>> config.levenshtein_distance
0
Viewing the site control panel
@ -112,6 +114,7 @@ Make some changes
>>> self.browser.getControl(name='form.field_list.0.').value = 'Title'
>>> self.browser.getControl(name='form.field_list.add').click()
>>> self.browser.getControl(name='form.field_list.1.').value = 'effective'
>>> self.browser.getControl(name='form.levenshtein_distance').value = 0.9
Click the save button:
@ -168,6 +171,8 @@ Make sure the changes have been applied correctly to the tool:
150
>>> config.field_list
[u'Title', u'effective']
>>> config.levenshtein_distance
0.9
Now that the connection is active we can also select more filter query
parameters from the complete list of Solr indexes, provided that we use the