general: add external links to search results (#20376)
This commit is contained in:
parent
c6a915d95f
commit
273f1b0032
|
@ -14,15 +14,58 @@
|
|||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import datetime
|
||||
|
||||
from haystack.management.commands.update_index import Command as UpdateIndexCommand
|
||||
|
||||
from combo.data.models import Page, ExternalLinkSearchItem
|
||||
from combo.apps.search.models import SearchCell
|
||||
|
||||
|
||||
class Command(UpdateIndexCommand):
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
'--skip-external-links-collection', action='store_true', default=False,
|
||||
dest='skip_external_links_collection')
|
||||
|
||||
def handle(self, **options):
|
||||
if SearchCell.objects.filter(_search_service='_text').count() == 0:
|
||||
# do not index site if there's no matching search cell
|
||||
return
|
||||
if not options.get('skip_external_links_collection', False):
|
||||
self.collect_external_links()
|
||||
return super(Command, self).handle(**options)
|
||||
|
||||
def collect_external_links(self):
|
||||
start_time = datetime.datetime.now()
|
||||
|
||||
# assemble external links data
|
||||
links = {}
|
||||
for page in Page.objects.all():
|
||||
if not page.is_visible(user=None):
|
||||
continue
|
||||
for cell in page.get_cells():
|
||||
if not cell.is_visible(user=None):
|
||||
continue
|
||||
for link_data in cell.get_external_links_data():
|
||||
if not link_data['url'] in links:
|
||||
# create an entry for that link.
|
||||
links[link_data['url']] = {}
|
||||
links[link_data['url']]['title'] = link_data['title']
|
||||
links[link_data['url']]['all_titles'] = []
|
||||
# all titles and additional texts will be assembled and indexed
|
||||
links[link_data['url']]['all_titles'].append(link_data['title'])
|
||||
links[link_data['url']]['all_titles'].append(link_data.get('text') or '')
|
||||
|
||||
# save data as ExternalLinkSearchItem objects
|
||||
for link_url, link_data in links.items():
|
||||
link_object, created = ExternalLinkSearchItem.objects.get_or_create(
|
||||
url=link_url,
|
||||
defaults={'title': link_data['title']})
|
||||
link_object.title = link_data['title']
|
||||
link_object.text = '\n'.join(link_data['all_titles'])
|
||||
link_object.save()
|
||||
|
||||
# remove obsolete objects
|
||||
ExternalLinkSearchItem.objects.filter(last_update_timestamp__lt=start_time).delete()
|
||||
|
|
|
@ -82,6 +82,22 @@ class WcsFormCell(CellBase):
|
|||
return
|
||||
return self.cached_title
|
||||
|
||||
def render_for_search(self):
|
||||
return ''
|
||||
|
||||
def get_external_links_data(self):
|
||||
if not (self.cached_url and self.cached_title):
|
||||
return []
|
||||
text = ''
|
||||
if self.cached_json:
|
||||
text = ' '.join([self.cached_json.get('description', ''),
|
||||
' '.join(self.cached_json.get('keywords', []))]).strip()
|
||||
return [{
|
||||
'url': self.cached_url,
|
||||
'title': self.cached_title,
|
||||
'text': text,
|
||||
}]
|
||||
|
||||
|
||||
class WcsCommonCategoryCell(CellBase):
|
||||
is_enabled = classmethod(is_wcs_enabled)
|
||||
|
@ -375,6 +391,18 @@ class WcsFormsOfCategoryCell(WcsCommonCategoryCell, WcsBlurpMixin):
|
|||
|
||||
return super(WcsFormsOfCategoryCell, self).render(context)
|
||||
|
||||
def render_for_search(self):
|
||||
return ''
|
||||
|
||||
def get_external_links_data(self):
|
||||
formdefs = self.get_data({'synchronous': True})
|
||||
for site in formdefs.values():
|
||||
for formdef in site.get('data'):
|
||||
yield {
|
||||
'url': formdef['url'],
|
||||
'title': formdef['title'],
|
||||
'text': formdef['description']}
|
||||
|
||||
|
||||
@register_cell_class
|
||||
class CategoriesCell(WcsDataBaseCell):
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('data', '0029_auto_20171022_1242'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='ExternalLinkSearchItem',
|
||||
fields=[
|
||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||
('title', models.CharField(max_length=150, verbose_name='Title')),
|
||||
('text', models.TextField(blank=True)),
|
||||
('url', models.CharField(max_length=200, verbose_name='URL', blank=True)),
|
||||
('last_update_timestamp', models.DateTimeField(auto_now=True)),
|
||||
],
|
||||
),
|
||||
]
|
|
@ -537,6 +537,9 @@ class CellBase(models.Model):
|
|||
from HTMLParser import HTMLParser
|
||||
return HTMLParser().unescape(strip_tags(self.render(context)))
|
||||
|
||||
def get_external_links_data(self):
|
||||
return []
|
||||
|
||||
|
||||
@register_cell_class
|
||||
class TextCell(CellBase):
|
||||
|
@ -707,6 +710,17 @@ class LinkCell(CellBase):
|
|||
from forms import LinkCellForm
|
||||
return LinkCellForm
|
||||
|
||||
def render_for_search(self):
|
||||
return ''
|
||||
|
||||
def get_external_links_data(self):
|
||||
if not self.url:
|
||||
return []
|
||||
link_data = self.get_cell_extra_context({})
|
||||
if link_data.get('title') and link_data.get('url'):
|
||||
return [link_data]
|
||||
return []
|
||||
|
||||
|
||||
@register_cell_class
|
||||
class FeedCell(CellBase):
|
||||
|
@ -1150,3 +1164,15 @@ class ConfigJsonCell(JsonCellBase):
|
|||
extra = super(ConfigJsonCell, self).get_cell_extra_context(ctx, **kwargs)
|
||||
ctx.update(extra)
|
||||
return ctx
|
||||
|
||||
|
||||
class ExternalLinkSearchItem(models.Model):
|
||||
# Link to an external site.
|
||||
#
|
||||
# Those are automatically collected during by the "update_index" command,
|
||||
# that calls get_external_links_data from all available cells, to be used
|
||||
# by the general search engine.
|
||||
title = models.CharField(_('Title'), max_length=150)
|
||||
text = models.TextField(blank=True)
|
||||
url = models.CharField(_('URL'), max_length=200, blank=True)
|
||||
last_update_timestamp = models.DateTimeField(auto_now=True)
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
from haystack import indexes
|
||||
from haystack.exceptions import SkipDocument
|
||||
|
||||
from .models import Page, CellBase
|
||||
from .models import Page, CellBase, ExternalLinkSearchItem
|
||||
|
||||
class PageIndex(indexes.SearchIndex, indexes.Indexable):
|
||||
title = indexes.CharField(model_attr='title', boost=1.5)
|
||||
|
@ -35,3 +35,12 @@ class PageIndex(indexes.SearchIndex, indexes.Indexable):
|
|||
if not obj.is_visible(user=None):
|
||||
raise SkipDocument()
|
||||
return super(PageIndex, self).prepare(obj)
|
||||
|
||||
|
||||
class ExternalLinkSearchIndex(indexes.SearchIndex, indexes.Indexable):
|
||||
title = indexes.CharField(model_attr='title', boost=1.5)
|
||||
text = indexes.CharField(model_attr='text', document=True)
|
||||
url = indexes.CharField(model_attr='url', indexed=False)
|
||||
|
||||
def get_model(self):
|
||||
return ExternalLinkSearchItem
|
||||
|
|
|
@ -409,12 +409,13 @@ def api_search(request):
|
|||
sqs = searchqueryset.auto_query(query).highlight()
|
||||
sqs.load_all()
|
||||
hits = []
|
||||
for page in sqs:
|
||||
if page.highlighted['text']:
|
||||
description = '<p>%s</p>' % page.highlighted['text'][0]
|
||||
for hit in sqs:
|
||||
description = None
|
||||
if hit.model_name == 'page' and hit.highlighted['text']:
|
||||
description = '<p>%s</p>' % hit.highlighted['text'][0]
|
||||
hits.append({
|
||||
'text': page.title,
|
||||
'url': page.url,
|
||||
'text': hit.title,
|
||||
'url': hit.url,
|
||||
'description': description,
|
||||
})
|
||||
|
||||
|
|
|
@ -6,13 +6,13 @@ import mock
|
|||
from django.conf import settings
|
||||
from django.test import Client, override_settings
|
||||
from django.test.client import RequestFactory
|
||||
from django.core.management import call_command
|
||||
from django.core.urlresolvers import reverse
|
||||
|
||||
from haystack.exceptions import SkipDocument
|
||||
|
||||
from combo.apps.search.management.commands.update_index import Command
|
||||
from combo.apps.search.models import SearchCell
|
||||
from combo.data.models import Page, JsonCell, TextCell, MenuCell
|
||||
from combo.data.models import Page, JsonCell, TextCell, MenuCell, LinkCell
|
||||
from combo.data.search_indexes import PageIndex
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
@ -224,3 +224,33 @@ def test_search_api(app):
|
|||
|
||||
resp = app.get('/api/search/?q=quux', status=200)
|
||||
assert len(resp.json['data']) == 0
|
||||
|
||||
def test_update_index_command(app):
|
||||
call_command('clear_index', interactive=False)
|
||||
call_command('update_index') # empty site
|
||||
|
||||
page = Page(title='example page', slug='example-page')
|
||||
page.save()
|
||||
|
||||
cell = SearchCell(page=page, _search_service='_text', order=0)
|
||||
cell.save()
|
||||
|
||||
call_command('update_index')
|
||||
resp = app.get('/api/search/?q=foobar', status=200)
|
||||
assert len(resp.json['data']) == 0
|
||||
|
||||
LinkCell(title='foobar', url='http://example.net', page=page, order=0).save()
|
||||
call_command('update_index')
|
||||
|
||||
resp = app.get('/api/search/?q=foobar', status=200)
|
||||
assert len(resp.json['data']) == 1
|
||||
assert resp.json['data'][0]['text'] == 'foobar'
|
||||
assert resp.json['data'][0]['description'] is None
|
||||
assert resp.json['data'][0]['url'] == 'http://example.net'
|
||||
|
||||
LinkCell(title='baz', url='http://example.net', page=page, order=0).save()
|
||||
call_command('update_index')
|
||||
|
||||
resp = app.get('/api/search/?q=baz', status=200)
|
||||
assert len(resp.json['data']) == 1
|
||||
assert resp.json['data'][0]['url'] == 'http://example.net'
|
||||
|
|
|
@ -243,6 +243,13 @@ def test_form_cell_save_cache():
|
|||
# make sure cached attributes are removed from serialized pages
|
||||
assert not 'cached_' in json.dumps(page.get_serialized_page())
|
||||
|
||||
# check content provided to search engine
|
||||
assert cell.render_for_search() == ''
|
||||
assert cell.get_external_links_data() == [
|
||||
{'title': 'form title',
|
||||
'url': 'http://127.0.0.1:8999/form-title/',
|
||||
'text': ''}]
|
||||
|
||||
@wcsctl_present
|
||||
def test_category_cell_save_cache():
|
||||
page = Page(title='xxx', slug='test_category_cell_save_cache', template_name='standard')
|
||||
|
@ -416,6 +423,10 @@ def test_forms_of_category_cell_render(context):
|
|||
assert result.index('form title') > result.index('a second form title')
|
||||
assert 'form title' in result and 'a second form title' in result
|
||||
|
||||
# check content provided to search engine
|
||||
assert cell.render_for_search() == ''
|
||||
assert len(list(cell.get_external_links_data())) == 2
|
||||
|
||||
@wcsctl_present
|
||||
def test_current_drafts_cell_render_unlogged(context):
|
||||
page = Page(title='xxx', slug='test_current_drafts_cell_render', template_name='standard')
|
||||
|
|
Loading…
Reference in New Issue