general: redo full text search using querysets (#33632)

This commit is contained in:
Frédéric Péters 2020-01-20 16:31:56 +01:00
parent 06417b1ff9
commit 7698d8a398
21 changed files with 355 additions and 307 deletions

View File

@ -15,6 +15,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import django.apps
from django.core.urlresolvers import reverse
from django.utils.translation import ugettext_lazy as _
from .engines import engines
@ -28,4 +29,22 @@ class AppConfig(django.apps.AppConfig):
from . import urls
return urls.urlpatterns
def hourly(self):
from .utils import index_site
index_site()
def ready(self):
# register built-in search engine for page contents
engines.register(self.get_search_engines)
def get_search_engines(self):
from .utils import search_site
return {
'_text': {
'function': search_site,
'label': _('Page Contents'),
}
}
default_app_config = 'combo.apps.search.AppConfig'

View File

@ -1,78 +0,0 @@
# combo - content management system
# Copyright (C) 2017 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from django.utils.timezone import now
from haystack.management.commands.update_index import Command as UpdateIndexCommand
from combo.data.models import Page, ExternalLinkSearchItem
from combo.apps.search.models import SearchCell
class Command(UpdateIndexCommand):
def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument(
'--skip-external-links-collection', action='store_true', default=False,
dest='skip_external_links_collection')
def handle(self, **options):
if not any(SearchCell.get_cells_by_search_service('_text')):
# do not index site if there's no matching search cell
return
if not options.get('skip_external_links_collection', False):
self.collect_external_links(options)
return super(Command, self).handle(**options)
def collect_external_links(self, options):
start_time = now()
if options.get('remove'):
ExternalLinkSearchItem.objects.all().delete()
# assemble external links data
links = {}
for page in Page.objects.filter(sub_slug=''):
if not page.is_visible(user=None):
continue
for cell in page.get_cells():
if not cell.is_visible(user=None):
continue
for link_data in cell.get_external_links_data():
if not link_data['url'] in links:
# create an entry for that link.
links[link_data['url']] = {}
links[link_data['url']]['title'] = link_data['title']
links[link_data['url']]['all_texts'] = []
else:
# if that link already exists, just keep the title as
# text.
links[link_data['url']]['all_texts'].append(link_data['title'])
# additional texts will be assembled and indexed
links[link_data['url']]['all_texts'].append(link_data.get('text') or '')
# save data as ExternalLinkSearchItem objects
for link_url, link_data in links.items():
link_object, created = ExternalLinkSearchItem.objects.get_or_create(
url=link_url,
defaults={'title': link_data['title']})
link_object.title = link_data['title']
link_object.text = '\n'.join(link_data['all_texts'])
link_object.save()
# remove obsolete objects
ExternalLinkSearchItem.objects.filter(last_update_timestamp__lt=start_time).delete()

View File

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.17 on 2020-01-20 15:30
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('data', '0043_delete_externallinksearchitem'),
('auth', '0008_alter_user_username_max_length'),
('contenttypes', '0002_remove_content_type_name'),
('search', '0005_searchcell_autofocus'),
]
operations = [
migrations.CreateModel(
name='IndexedCell',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('cell_pk', models.PositiveIntegerField(null=True)),
('url', models.CharField(blank=True, max_length=500, null=True)),
('title', models.CharField(blank=True, max_length=500, null=True)),
('indexed_text', models.TextField(blank=True, null=True)),
('public_access', models.BooleanField(default=False)),
('last_update_timestamp', models.DateTimeField(auto_now=True)),
('cell_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='contenttypes.ContentType')),
('excluded_groups', models.ManyToManyField(blank=True, related_name='_indexedcell_excluded_groups_+', to='auth.Group')),
('page', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='data.Page')),
('restricted_groups', models.ManyToManyField(blank=True, related_name='_indexedcell_restricted_groups_+', to='auth.Group')),
],
),
]

View File

@ -16,21 +16,21 @@
import os
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.contenttypes import fields
from django.contrib.contenttypes.models import ContentType
from django.db import models
from django.utils.translation import ugettext_lazy as _
from django import template
from django.http import HttpResponse
from django.core.exceptions import PermissionDenied
from django.core.urlresolvers import reverse
from django.utils.http import quote
from django.template import RequestContext, Template
from jsonfield import JSONField
from haystack import connections
from combo.utils import requests
from combo.data.models import CellBase
from combo.data.models import CellBase, Page
from combo.data.library import register_cell_class
from combo.utils import get_templated_url
@ -69,7 +69,7 @@ class SearchCell(CellBase):
services = []
for service_slug in self._search_services.get('data') or []:
service = engines.get(service_slug)
if service and service.get('url'):
if service and (service.get('url') or service.get('function')):
service['slug'] = service_slug
services.append(service)
return services
@ -141,30 +141,33 @@ class SearchCell(CellBase):
if not query:
return render_response(service)
url = get_templated_url(service['url'],
context={'request': request, 'q': query, 'search_service': service})
url = url % {'q': quote(query.encode('utf-8'))} # if url contains %(q)s
if url.startswith('/'):
url = request.build_absolute_uri(url)
if service.get('function'): # internal search engine
results = {'data': service['function'](request, query)}
else:
url = get_templated_url(service['url'],
context={'request': request, 'q': query, 'search_service': service})
url = url % {'q': quote(query.encode('utf-8'))} # if url contains %(q)s
if url.startswith('/'):
url = request.build_absolute_uri(url)
if not url:
return render_response(service)
if not url:
return render_response(service)
kwargs = {}
kwargs['cache_duration'] = service.get('cache_duration', 0)
kwargs['remote_service'] = 'auto' if service.get('signature') else None
# don't automatically add user info to query string, if required it can
# be set explicitely in the URL template in the engine definition (via
# {{user_nameid}} or {{user_email}}).
kwargs['without_user'] = True
# don't send error traces on HTTP errors
kwargs['log_errors'] = 'warn'
kwargs = {}
kwargs['cache_duration'] = service.get('cache_duration', 0)
kwargs['remote_service'] = 'auto' if service.get('signature') else None
# don't automatically add user info to query string, if required it can
# be set explicitely in the URL template in the engine definition (via
# {{user_nameid}} or {{user_email}}).
kwargs['without_user'] = True
# don't send error traces on HTTP errors
kwargs['log_errors'] = 'warn'
response = requests.get(url, **kwargs)
try:
results = response.json()
except ValueError:
return render_response(service)
response = requests.get(url, **kwargs)
try:
results = response.json()
except ValueError:
return render_response(service)
if service.get('data_key'):
results['data'] = results.get(service['data_key']) or []
@ -179,10 +182,25 @@ class SearchCell(CellBase):
for hit in results.get('data') or []:
for k, v in hit_templates.items():
hit[k] = v.render(RequestContext(request, hit))
return render_response(service, results)
def has_text_search_service(self):
return '_text' in self._search_services.get('data', [])
def missing_index(self):
return not os.path.exists(connections['default'].get_backend().path)
return IndexedCell.objects.all().count() == 0
class IndexedCell(models.Model):
cell_type = models.ForeignKey(ContentType, on_delete=models.CASCADE)
cell_pk = models.PositiveIntegerField(null=True)
cell = fields.GenericForeignKey('cell_type', 'cell_pk')
page = models.ForeignKey(Page, on_delete=models.CASCADE, blank=True, null=True)
url = models.CharField(max_length=500, blank=True, null=True)
title = models.CharField(max_length=500, blank=True, null=True)
indexed_text = models.TextField(blank=True, null=True)
public_access = models.BooleanField(default=False)
restricted_groups = models.ManyToManyField(Group, blank=True, related_name='+')
excluded_groups = models.ManyToManyField(Group, blank=True, related_name='+')
last_update_timestamp = models.DateTimeField(auto_now=True)

111
combo/apps/search/utils.py Normal file
View File

@ -0,0 +1,111 @@
# combo - content management system
# Copyright (C) 2014-2020 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from django.conf import settings
from django.contrib.contenttypes.models import ContentType
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector
from combo.data.models import CellBase
from django.db import connection
from django.db.models import Q
from django.db.transaction import atomic
from .models import IndexedCell
def set_cell_access(indexed_cell, cell):
indexed_cell.public_access = bool(cell.page.public and cell.public)
indexed_cell.excluded_groups.clear()
indexed_cell.restricted_groups.clear()
if not indexed_cell.public_access:
indexed_cell.restricted_groups.set(cell.groups.all())
if cell.restricted_to_unlogged:
indexed_cell.excluded_groups.set(cell.page.groups.all())
else:
for group in cell.page.groups.all():
indexed_cell.restricted_groups.add(group)
indexed_cell.save()
@atomic
def index_site():
IndexedCell.objects.all().delete()
external_urls = {}
for klass in CellBase.get_cell_classes():
for cell in klass.objects.filter(page__snapshot__isnull=True).exclude(placeholder__startswith='_'):
cell_type = ContentType.objects.get_for_model(cell)
indexed_cell = IndexedCell(cell_type=cell_type, cell_pk=cell.id)
try:
indexed_cell.indexed_text = cell.render_for_search()
except Exception: # ignore rendering error
continue
if indexed_cell.indexed_text:
indexed_cell.page_id = cell.page_id
indexed_cell.url = cell.page.get_online_url()
indexed_cell.title = cell.page.title
indexed_cell.save()
set_cell_access(indexed_cell, cell)
for link_data in cell.get_external_links_data():
# index external links
indexed_cell = external_urls.get(indexed_cell.url)
if indexed_cell is None:
# create an entry for that link.
indexed_cell = IndexedCell(cell_type=cell_type, cell_pk=cell.id)
indexed_cell.save()
set_cell_access(indexed_cell, cell)
indexed_cell.url = link_data['url']
indexed_cell.title = link_data['title']
indexed_cell.indexed_text = link_data.get('text') or ''
external_urls[indexed_cell.url] = indexed_cell
else:
# if that link already exists, add detailed texts
indexed_cell.indexed_text += ' ' + link_data['title']
indexed_cell.indexed_text += ' ' + link_data.get('text') or ''
indexed_cell.save()
def search_site(request, query):
if connection.vendor == 'postgresql':
config = settings.POSTGRESQL_FTS_SEARCH_CONFIG
vector = SearchVector('title', config=config, weight='A') + SearchVector('indexed_text', config=config, weight='A')
query = SearchQuery(query)
qs = IndexedCell.objects.annotate(rank=SearchRank(vector, query)).filter(rank__gte=0.3).order_by('-rank')
else:
qs = IndexedCell.objects.filter(
Q(indexed_text__icontains=query) | Q(title__icontains=query))
if request.user.is_anonymous:
qs = qs.exclude(public_access=False)
else:
qs = qs.filter(
Q(restricted_groups=None) |
Q(restricted_groups__in=request.user.groups.all()))
qs = qs.exclude(excluded_groups__in=request.user.groups.all())
hits = []
seen = {}
for hit in qs:
if hit.url in seen:
continue
hits.append({
'text': hit.title,
'rank': getattr(hit, 'rank', None),
'url': hit.url,
})
seen[hit.url] = True
if len(hits) == 10:
break
return hits

View File

@ -15,23 +15,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from django.apps import AppConfig
from django.core.urlresolvers import reverse
from django.utils.translation import ugettext_lazy as _
class DataConfig(AppConfig):
name = 'combo.data'
verbose_name = 'data'
def ready(self):
# register built-in search engine for page contents
from combo.apps.search import engines
engines.register(self.get_search_engines)
def get_search_engines(self):
return {
'_text': {
'url': reverse('api-search') + '?q=%(q)s',
'label': _('Page Contents'),
}
}

View File

@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.17 on 2020-01-20 15:30
from __future__ import unicode_literals
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('data', '0042_page_creation_timestamp'),
]
operations = [
migrations.DeleteModel(
name='ExternalLinkSearchItem',
),
]

View File

@ -755,10 +755,6 @@ class CellBase(six.with_metaclass(CellMeta, models.Model)):
return ''
if self.user_dependant:
return ''
if not self.page.is_visible(user=None):
return ''
if not self.is_visible(user=None):
return ''
request = RequestFactory().get(self.page.get_online_url())
request.user = None # compat
context = {
@ -1474,18 +1470,6 @@ class ConfigJsonCell(JsonCellBase):
return context
class ExternalLinkSearchItem(models.Model):
# Link to an external site.
#
# Those are automatically collected during by the "update_index" command,
# that calls get_external_links_data from all available cells, to be used
# by the general search engine.
title = models.CharField(_('Title'), max_length=150)
text = models.TextField(blank=True)
url = models.CharField(_('URL'), max_length=200, blank=True)
last_update_timestamp = models.DateTimeField(auto_now=True)
@receiver(pre_save, sender=Page)
def create_redirects(sender, instance, raw, **kwargs):
if raw or not instance.id or instance.snapshot_id:

View File

@ -1,46 +0,0 @@
# combo - content management system
# Copyright (C) 2014-2017 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from haystack import indexes
from haystack.exceptions import SkipDocument
from .models import Page, CellBase, ExternalLinkSearchItem
class PageIndex(indexes.SearchIndex, indexes.Indexable):
title = indexes.CharField(model_attr='title', boost=1.5)
text = indexes.CharField(document=True, use_template=True,
template_name='combo/search/page.txt')
url = indexes.CharField(indexed=False)
def get_model(self):
return Page
def prepare_url(self, obj):
return obj.get_online_url()
def prepare(self, obj):
if not obj.is_visible(user=None):
raise SkipDocument()
return super(PageIndex, self).prepare(obj)
class ExternalLinkSearchIndex(indexes.SearchIndex, indexes.Indexable):
title = indexes.CharField(model_attr='title', boost=1.5)
text = indexes.CharField(model_attr='text', document=True)
url = indexes.CharField(model_attr='url', indexed=False)
def get_model(self):
return ExternalLinkSearchItem

View File

@ -1,7 +0,0 @@
{% autoescape off %}
{% for cell in object.get_cells %}
{% if cell.placeholder|first != '_' %} {# ignore technical placeholders #}
{{ cell.render_for_search }}
{% endif %}
{% endfor %}
{% endautoescape %}

View File

@ -21,7 +21,6 @@ from . import views
urlpatterns = [
url(r'^api/menu-badges/$', views.menu_badges),
url(r'^api/search/$', views.api_search, name='api-search'),
url(r'^ajax/cell/(?P<page_pk>\w+)/(?P<cell_reference>[\w_-]+)/$',
views.ajax_page_cell, name='combo-public-ajax-page-cell'),
url(r'^snapshot/(?P<pk>\w+)/$', manager_required(views.snapshot), name='combo-snapshot-view'),

View File

@ -40,9 +40,6 @@ from django.views.decorators.csrf import csrf_exempt
from django.utils.translation import ugettext as _
from django.forms.widgets import Media
from haystack.inputs import AutoQuery
from haystack.query import SearchQuerySet, SQ
if 'mellon' in settings.INSTALLED_APPS:
from mellon.utils import get_idps
else:
@ -577,31 +574,6 @@ def menu_badges(request):
menu_badges.mellon_no_passive = True
def api_search(request):
for cell in SearchCell.get_cells_by_search_service('_text'):
if not cell.is_visible(request.user):
continue
break
else:
raise Http404()
query = request.GET.get('q') or ''
sqs = SearchQuerySet().filter(SQ(content=AutoQuery(query)) | SQ(title=AutoQuery(query)))
sqs = sqs.highlight()
sqs.load_all()
hits = []
for hit in sqs:
description = None
if hit.model_name == 'page' and hit.highlighted['text']:
description = '<p>%s</p>' % hit.highlighted['text'][0]
hits.append({
'text': hit.title,
'url': hit.url,
'description': description,
})
return HttpResponse(json.dumps({'data': hits}), content_type='application/json')
def snapshot(request, *args, **kwargs):
snapshot = PageSnapshot.objects.get(id=kwargs['pk'])
return publish_page(request, snapshot.get_page())

View File

@ -77,7 +77,6 @@ INSTALLED_APPS = (
'combo.apps.pwa',
'combo.apps.gallery',
'combo.apps.kb',
'haystack',
'xstatic.pkg.josefinsans',
'xstatic.pkg.leaflet',
'xstatic.pkg.opensans',
@ -189,13 +188,6 @@ CKEDITOR_CONFIGS = {
CKEDITOR_CONFIGS['small'] = copy.copy(CKEDITOR_CONFIGS['default'])
CKEDITOR_CONFIGS['small']['height'] = 150
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine',
'PATH': os.path.join(BASE_DIR, 'whoosh_index'),
},
}
# from solr.thumbnail -- https://sorl-thumbnail.readthedocs.io/en/latest/reference/settings.html
THUMBNAIL_PRESERVE_FORMAT = True
THUMBNAIL_FORCE_OVERWRITE = False
@ -264,6 +256,7 @@ MELLON_IDENTITY_PROVIDERS = []
# search services
COMBO_SEARCH_SERVICES = {}
POSTGRESQL_FTS_SEARCH_CONFIG = 'french'
# mapping of payment modes
LINGO_NO_ONLINE_PAYMENT_REASONS = {}

View File

@ -2,5 +2,3 @@
/sbin/runuser -u combo /usr/bin/combo-manage -- tenant_command cron --all-tenants
/sbin/runuser -u combo /usr/bin/combo-manage -- tenant_command clearsessions --all-tenants
# update_index cannot be used due to some bug in haystack/whoosh (#30509)
/sbin/runuser -u combo /usr/bin/combo-manage -- tenant_command rebuild_index --noinput --all-tenants -v0

3
debian/control vendored
View File

@ -21,14 +21,13 @@ Depends: ${misc:Depends}, ${python3:Depends},
python3-xstatic-opensans,
python3-xstatic-roboto-fontface (>= 0.5.0.0),
python3-eopayment (>= 1.35),
python3-django-haystack (>= 2.4.0),
python3-django-ratelimit,
python3-sorl-thumbnail,
python3-pil,
python3-pywebpush,
python3-pygal,
python3-lxml
Recommends: python3-django-mellon, python3-whoosh
Recommends: python3-django-mellon
Conflicts: python-lingo
Breaks: combo (<< 2.34.post2)
Description: Portal Management System (Python module)

View File

@ -11,7 +11,5 @@ XStatic_roboto-fontface
eopayment>=1.13
python-dateutil
djangorestframework>=3.3, <3.7
django-haystack
whoosh
sorl-thumbnail
pyproj

View File

@ -163,9 +163,7 @@ setup(
'eopayment>=1.41',
'python-dateutil',
'djangorestframework>=3.3, <3.7',
'django-haystack',
'django-ratelimit<3',
'whoosh',
'sorl-thumbnail',
'Pillow',
'pyproj',

View File

@ -44,9 +44,6 @@ COMBO_DASHBOARD_ENABLED = True
import tempfile
MEDIA_ROOT = tempfile.mkdtemp('combo-test')
HAYSTACK_CONNECTIONS['default']['PATH'] = os.path.join(
tempfile.mkdtemp('combo-test-whoosh'))
if 'DISABLE_MIGRATIONS' in os.environ:
class DisableMigrations(object):
def __contains__(self, item):

View File

@ -6,17 +6,16 @@ import shutil
import mock
from django.conf import settings
from django.contrib.auth.models import AnonymousUser, User, Group
from django.test import override_settings
from django.test.client import RequestFactory
from django.core.management import call_command
from django.core.urlresolvers import reverse
from haystack.exceptions import SkipDocument
from combo.apps.search.engines import engines
from combo.apps.search.models import SearchCell
from combo.apps.search.models import SearchCell, IndexedCell
from combo.apps.search.utils import index_site, search_site
from combo.data.models import Page, JsonCell, TextCell, MenuCell, LinkCell
from combo.data.search_indexes import PageIndex
from .test_manager import login
@ -229,9 +228,9 @@ def test_search_contents():
page = Page(title='example page', slug='example-page')
page.save()
# no indexation of private cells (is_visible check)
# private cells are indexed
cell = TextCell(page=page, text='foobar', public=False, order=0)
assert cell.render_for_search() == ''
assert cell.render_for_search().strip() == 'foobar'
# no indexation of empty cells (is_relevant check)
cell = TextCell(page=page, text='', order=0)
@ -247,25 +246,20 @@ def test_search_contents():
def test_search_contents_index():
page = Page(title='example page', slug='example-page')
page.public = True
page.save()
page_index = PageIndex()
assert page_index.get_model() is Page
assert page_index.prepare_url(page) == '/example-page/'
page_index.prepare(page)
page.public = False
with pytest.raises(SkipDocument):
page_index.prepare(page)
page.public = True
cell = TextCell(page=page, text='<p>foobar</p>', order=0)
cell.save()
prepared_data = page_index.prepare(page)
assert 'foobar' in prepared_data['text']
request = RequestFactory().get('/')
request.user = AnonymousUser()
hits = search_site(request, 'foobar')
assert len(hits) == 0
index_site()
hits = search_site(request, 'foobar')
assert len(hits) == 1
def test_search_contents_technical_placeholder():
page = Page(title='example page', slug='example-page')
@ -274,10 +268,14 @@ def test_search_contents_technical_placeholder():
TextCell(page=page, text='<p>foobar</p>', order=0, placeholder='_off').save()
TextCell(page=page, text='<p>barfoo</p>', order=0, placeholder='on').save()
page_index = PageIndex()
prepared_data = page_index.prepare(page)
assert 'barfoo' in prepared_data['text']
assert not 'foobar' in prepared_data['text']
request = RequestFactory().get('/')
request.user = AnonymousUser()
index_site()
hits = search_site(request, 'foobar')
assert len(hits) == 0
hits = search_site(request, 'barfoo')
assert len(hits) == 1
def test_search_api(app):
page = Page(title='example page', slug='example-page')
@ -291,70 +289,61 @@ def test_search_api(app):
cell = TextCell(page=second_page, text='<p>other baz</p>', order=0)
cell.save()
page_index = PageIndex()
page_index.reindex()
resp = app.get('/api/search/?q=foobar', status=404)
index_site()
cell = SearchCell(page=page, _search_services={'data': ['_text']}, order=0)
cell.save()
resp = app.get('/api/search/?q=foobar', status=200)
assert len(resp.json['data']) == 1
assert resp.json['data'][0]['text'] == 'example page'
resp = app.get('/ajax/search/%s/_text/?q=foobar' % cell.id, status=200)
assert resp.text.count('<li') == 1
assert 'example page' in resp.text
resp = app.get('/api/search/?q=other', status=200)
assert len(resp.json['data']) == 1
assert resp.json['data'][0]['text'] == 'second page'
resp = app.get('/ajax/search/%s/_text/?q=other' % cell.id, status=200)
assert resp.text.count('<li') == 1
assert 'second page' in resp.text
resp = app.get('/api/search/?q=baz', status=200)
assert len(resp.json['data']) == 2
resp = app.get('/ajax/search/%s/_text/?q=baz' % cell.id, status=200)
assert resp.text.count('<li') == 2
resp = app.get('/api/search/?q=quux', status=200)
assert len(resp.json['data']) == 0
resp = app.get('/ajax/search/%s/_text/?q=quux' % cell.id, status=200)
assert resp.text.count('<li') == 0
def test_update_index_command(app):
call_command('clear_index', interactive=False)
call_command('update_index') # empty site
def test_search_external_links(app):
page = Page(title='example page', slug='example-page')
page.save()
cell = SearchCell(page=page, _search_services={'data': ['_text']}, order=0)
cell.save()
call_command('update_index')
resp = app.get('/api/search/?q=foobar', status=200)
assert len(resp.json['data']) == 0
index_site()
request = RequestFactory().get('/')
request.user = AnonymousUser()
hits = search_site(request, 'foobar')
assert len(hits) == 0
LinkCell(title='foobar', url='http://example.net', page=page, order=0).save()
call_command('update_index')
index_site()
resp = app.get('/api/search/?q=foobar', status=200)
assert len(resp.json['data']) == 1
assert resp.json['data'][0]['text'] == 'foobar'
assert resp.json['data'][0]['description'] is None
assert resp.json['data'][0]['url'] == 'http://example.net'
hits = search_site(request, 'foobar')
assert len(hits) == 1
assert hits[0]['text'] == 'foobar'
assert hits[0]['url'] == 'http://example.net'
# second link with same target
LinkCell(title='baz', url='http://example.net', page=page, order=0).save()
call_command('update_index')
resp = app.get('/api/search/?q=baz', status=200)
assert len(resp.json['data']) == 1
assert resp.json['data'][0]['url'] == 'http://example.net'
index_site()
# add a second link with the same target
LinkCell(title='bar', url='http://example.net', page=page, order=0).save()
call_command('update_index')
hits = search_site(request, 'baz')
assert len(hits) == 1
assert hits[0]['text'] in ('foobar', 'baz')
assert hits[0]['url'] == 'http://example.net'
hits = search_site(request, 'foobar')
assert len(hits) == 1
assert hits[0]['text'] in ('foobar', 'baz')
assert hits[0]['url'] == 'http://example.net'
resp = app.get('/api/search/?q=baz', status=200)
assert len(resp.json['data']) == 1
assert resp.json['data'][0]['url'] == 'http://example.net'
resp = app.get('/api/search/?q=bar', status=200)
assert len(resp.json['data']) == 1
assert resp.json['data'][0]['url'] == 'http://example.net'
def test_manager_search_cell(app, admin_user):
Page.objects.all().delete()
@ -399,9 +388,6 @@ def test_manager_search_cell(app, admin_user):
def test_manager_waiting_index_message(app, admin_user):
from haystack import connections
shutil.rmtree(connections['default'].get_backend().path)
Page.objects.all().delete()
page = Page(title='One', slug='one', template_name='standard')
page.save()
@ -417,8 +403,7 @@ def test_manager_waiting_index_message(app, admin_user):
resp = resp.form.submit().follow()
assert 'Content indexing has been scheduled' in resp.text
os.mkdir(connections['default'].get_backend().path)
call_command('update_index')
index_site()
resp = app.get('/manage/pages/%s/' % page.id)
assert 'Content indexing has been scheduled' not in resp.text
@ -455,3 +440,73 @@ def test_profile_search_engines(app):
page.save()
search_engines = engines.get_engines()
assert 'users' in search_engines.keys()
def test_private_search(app):
page = Page(title='example page', slug='example-page')
page.save()
TextCell(page=page, text='<p>foobar</p>', order=0, public=False).save()
TextCell(page=page, text='<p>barfoo</p>', order=0, public=True).save()
request = RequestFactory().get('/')
request.user = AnonymousUser()
index_site()
hits = search_site(request, 'foobar')
assert len(hits) == 0
hits = search_site(request, 'barfoo')
assert len(hits) == 1
request.user = User.objects.create_user(username='normal-user')
hits = search_site(request, 'foobar')
assert len(hits) == 1
hits = search_site(request, 'barfoo')
assert len(hits) == 1
def test_restricted_search(app):
group = Group(name='plop')
group.save()
page = Page(title='example page', slug='example-page')
page.save()
cell = TextCell(page=page, text='<p>foobar</p>', order=0, public=False)
cell.save()
cell.groups.set([group])
TextCell(page=page, text='<p>barfoo</p>', order=0, public=False).save()
index_site()
# first cell is restricted, it's not found
request = RequestFactory().get('/')
request.user = User.objects.create_user(username='normal-user')
hits = search_site(request, 'foobar')
assert len(hits) == 0
hits = search_site(request, 'barfoo')
assert len(hits) == 1
page.groups.set([group])
index_site()
# page is restricted, no cell is found
hits = search_site(request, 'foobar')
assert len(hits) == 0
hits = search_site(request, 'barfoo')
assert len(hits) == 0
# user is in group, gets a result
request.user.groups.set([group])
hits = search_site(request, 'foobar')
assert len(hits) == 1
hits = search_site(request, 'barfoo')
assert len(hits) == 1
# cell is excluded from group view
cell.restricted_to_unlogged = True
cell.save()
index_site()
hits = search_site(request, 'foobar')
assert len(hits) == 0
hits = search_site(request, 'barfoo')
assert len(hits) == 1