general: redo full text search using querysets (#33632)
This commit is contained in:
parent
06417b1ff9
commit
7698d8a398
|
@ -15,6 +15,7 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import django.apps
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from .engines import engines
|
||||
|
@ -28,4 +29,22 @@ class AppConfig(django.apps.AppConfig):
|
|||
from . import urls
|
||||
return urls.urlpatterns
|
||||
|
||||
def hourly(self):
|
||||
from .utils import index_site
|
||||
index_site()
|
||||
|
||||
def ready(self):
|
||||
# register built-in search engine for page contents
|
||||
engines.register(self.get_search_engines)
|
||||
|
||||
def get_search_engines(self):
|
||||
from .utils import search_site
|
||||
return {
|
||||
'_text': {
|
||||
'function': search_site,
|
||||
'label': _('Page Contents'),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
default_app_config = 'combo.apps.search.AppConfig'
|
||||
|
|
|
@ -1,78 +0,0 @@
|
|||
# combo - content management system
|
||||
# Copyright (C) 2017 Entr'ouvert
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it
|
||||
# under the terms of the GNU Affero General Public License as published
|
||||
# by the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from django.utils.timezone import now
|
||||
|
||||
from haystack.management.commands.update_index import Command as UpdateIndexCommand
|
||||
|
||||
from combo.data.models import Page, ExternalLinkSearchItem
|
||||
from combo.apps.search.models import SearchCell
|
||||
|
||||
|
||||
class Command(UpdateIndexCommand):
|
||||
|
||||
def add_arguments(self, parser):
|
||||
super(Command, self).add_arguments(parser)
|
||||
parser.add_argument(
|
||||
'--skip-external-links-collection', action='store_true', default=False,
|
||||
dest='skip_external_links_collection')
|
||||
|
||||
def handle(self, **options):
|
||||
if not any(SearchCell.get_cells_by_search_service('_text')):
|
||||
# do not index site if there's no matching search cell
|
||||
return
|
||||
if not options.get('skip_external_links_collection', False):
|
||||
self.collect_external_links(options)
|
||||
return super(Command, self).handle(**options)
|
||||
|
||||
def collect_external_links(self, options):
|
||||
start_time = now()
|
||||
|
||||
if options.get('remove'):
|
||||
ExternalLinkSearchItem.objects.all().delete()
|
||||
|
||||
# assemble external links data
|
||||
links = {}
|
||||
for page in Page.objects.filter(sub_slug=''):
|
||||
if not page.is_visible(user=None):
|
||||
continue
|
||||
for cell in page.get_cells():
|
||||
if not cell.is_visible(user=None):
|
||||
continue
|
||||
for link_data in cell.get_external_links_data():
|
||||
if not link_data['url'] in links:
|
||||
# create an entry for that link.
|
||||
links[link_data['url']] = {}
|
||||
links[link_data['url']]['title'] = link_data['title']
|
||||
links[link_data['url']]['all_texts'] = []
|
||||
else:
|
||||
# if that link already exists, just keep the title as
|
||||
# text.
|
||||
links[link_data['url']]['all_texts'].append(link_data['title'])
|
||||
# additional texts will be assembled and indexed
|
||||
links[link_data['url']]['all_texts'].append(link_data.get('text') or '')
|
||||
|
||||
# save data as ExternalLinkSearchItem objects
|
||||
for link_url, link_data in links.items():
|
||||
link_object, created = ExternalLinkSearchItem.objects.get_or_create(
|
||||
url=link_url,
|
||||
defaults={'title': link_data['title']})
|
||||
link_object.title = link_data['title']
|
||||
link_object.text = '\n'.join(link_data['all_texts'])
|
||||
link_object.save()
|
||||
|
||||
# remove obsolete objects
|
||||
ExternalLinkSearchItem.objects.filter(last_update_timestamp__lt=start_time).delete()
|
|
@ -0,0 +1,35 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Generated by Django 1.11.17 on 2020-01-20 15:30
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('data', '0043_delete_externallinksearchitem'),
|
||||
('auth', '0008_alter_user_username_max_length'),
|
||||
('contenttypes', '0002_remove_content_type_name'),
|
||||
('search', '0005_searchcell_autofocus'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='IndexedCell',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('cell_pk', models.PositiveIntegerField(null=True)),
|
||||
('url', models.CharField(blank=True, max_length=500, null=True)),
|
||||
('title', models.CharField(blank=True, max_length=500, null=True)),
|
||||
('indexed_text', models.TextField(blank=True, null=True)),
|
||||
('public_access', models.BooleanField(default=False)),
|
||||
('last_update_timestamp', models.DateTimeField(auto_now=True)),
|
||||
('cell_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='contenttypes.ContentType')),
|
||||
('excluded_groups', models.ManyToManyField(blank=True, related_name='_indexedcell_excluded_groups_+', to='auth.Group')),
|
||||
('page', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='data.Page')),
|
||||
('restricted_groups', models.ManyToManyField(blank=True, related_name='_indexedcell_restricted_groups_+', to='auth.Group')),
|
||||
],
|
||||
),
|
||||
]
|
|
@ -16,21 +16,21 @@
|
|||
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.contenttypes import fields
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.db import models
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
from django import template
|
||||
from django.http import HttpResponse
|
||||
from django.core.exceptions import PermissionDenied
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.utils.http import quote
|
||||
from django.template import RequestContext, Template
|
||||
|
||||
from jsonfield import JSONField
|
||||
from haystack import connections
|
||||
|
||||
from combo.utils import requests
|
||||
from combo.data.models import CellBase
|
||||
from combo.data.models import CellBase, Page
|
||||
from combo.data.library import register_cell_class
|
||||
from combo.utils import get_templated_url
|
||||
|
||||
|
@ -69,7 +69,7 @@ class SearchCell(CellBase):
|
|||
services = []
|
||||
for service_slug in self._search_services.get('data') or []:
|
||||
service = engines.get(service_slug)
|
||||
if service and service.get('url'):
|
||||
if service and (service.get('url') or service.get('function')):
|
||||
service['slug'] = service_slug
|
||||
services.append(service)
|
||||
return services
|
||||
|
@ -141,30 +141,33 @@ class SearchCell(CellBase):
|
|||
if not query:
|
||||
return render_response(service)
|
||||
|
||||
url = get_templated_url(service['url'],
|
||||
context={'request': request, 'q': query, 'search_service': service})
|
||||
url = url % {'q': quote(query.encode('utf-8'))} # if url contains %(q)s
|
||||
if url.startswith('/'):
|
||||
url = request.build_absolute_uri(url)
|
||||
if service.get('function'): # internal search engine
|
||||
results = {'data': service['function'](request, query)}
|
||||
else:
|
||||
url = get_templated_url(service['url'],
|
||||
context={'request': request, 'q': query, 'search_service': service})
|
||||
url = url % {'q': quote(query.encode('utf-8'))} # if url contains %(q)s
|
||||
if url.startswith('/'):
|
||||
url = request.build_absolute_uri(url)
|
||||
|
||||
if not url:
|
||||
return render_response(service)
|
||||
if not url:
|
||||
return render_response(service)
|
||||
|
||||
kwargs = {}
|
||||
kwargs['cache_duration'] = service.get('cache_duration', 0)
|
||||
kwargs['remote_service'] = 'auto' if service.get('signature') else None
|
||||
# don't automatically add user info to query string, if required it can
|
||||
# be set explicitely in the URL template in the engine definition (via
|
||||
# {{user_nameid}} or {{user_email}}).
|
||||
kwargs['without_user'] = True
|
||||
# don't send error traces on HTTP errors
|
||||
kwargs['log_errors'] = 'warn'
|
||||
kwargs = {}
|
||||
kwargs['cache_duration'] = service.get('cache_duration', 0)
|
||||
kwargs['remote_service'] = 'auto' if service.get('signature') else None
|
||||
# don't automatically add user info to query string, if required it can
|
||||
# be set explicitely in the URL template in the engine definition (via
|
||||
# {{user_nameid}} or {{user_email}}).
|
||||
kwargs['without_user'] = True
|
||||
# don't send error traces on HTTP errors
|
||||
kwargs['log_errors'] = 'warn'
|
||||
|
||||
response = requests.get(url, **kwargs)
|
||||
try:
|
||||
results = response.json()
|
||||
except ValueError:
|
||||
return render_response(service)
|
||||
response = requests.get(url, **kwargs)
|
||||
try:
|
||||
results = response.json()
|
||||
except ValueError:
|
||||
return render_response(service)
|
||||
|
||||
if service.get('data_key'):
|
||||
results['data'] = results.get(service['data_key']) or []
|
||||
|
@ -179,10 +182,25 @@ class SearchCell(CellBase):
|
|||
for hit in results.get('data') or []:
|
||||
for k, v in hit_templates.items():
|
||||
hit[k] = v.render(RequestContext(request, hit))
|
||||
|
||||
return render_response(service, results)
|
||||
|
||||
def has_text_search_service(self):
|
||||
return '_text' in self._search_services.get('data', [])
|
||||
|
||||
def missing_index(self):
|
||||
return not os.path.exists(connections['default'].get_backend().path)
|
||||
return IndexedCell.objects.all().count() == 0
|
||||
|
||||
|
||||
class IndexedCell(models.Model):
|
||||
cell_type = models.ForeignKey(ContentType, on_delete=models.CASCADE)
|
||||
cell_pk = models.PositiveIntegerField(null=True)
|
||||
cell = fields.GenericForeignKey('cell_type', 'cell_pk')
|
||||
page = models.ForeignKey(Page, on_delete=models.CASCADE, blank=True, null=True)
|
||||
url = models.CharField(max_length=500, blank=True, null=True)
|
||||
title = models.CharField(max_length=500, blank=True, null=True)
|
||||
indexed_text = models.TextField(blank=True, null=True)
|
||||
public_access = models.BooleanField(default=False)
|
||||
restricted_groups = models.ManyToManyField(Group, blank=True, related_name='+')
|
||||
excluded_groups = models.ManyToManyField(Group, blank=True, related_name='+')
|
||||
last_update_timestamp = models.DateTimeField(auto_now=True)
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
# combo - content management system
|
||||
# Copyright (C) 2014-2020 Entr'ouvert
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it
|
||||
# under the terms of the GNU Affero General Public License as published
|
||||
# by the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector
|
||||
from combo.data.models import CellBase
|
||||
from django.db import connection
|
||||
from django.db.models import Q
|
||||
from django.db.transaction import atomic
|
||||
|
||||
from .models import IndexedCell
|
||||
|
||||
|
||||
def set_cell_access(indexed_cell, cell):
|
||||
indexed_cell.public_access = bool(cell.page.public and cell.public)
|
||||
indexed_cell.excluded_groups.clear()
|
||||
indexed_cell.restricted_groups.clear()
|
||||
if not indexed_cell.public_access:
|
||||
indexed_cell.restricted_groups.set(cell.groups.all())
|
||||
if cell.restricted_to_unlogged:
|
||||
indexed_cell.excluded_groups.set(cell.page.groups.all())
|
||||
else:
|
||||
for group in cell.page.groups.all():
|
||||
indexed_cell.restricted_groups.add(group)
|
||||
indexed_cell.save()
|
||||
|
||||
|
||||
@atomic
|
||||
def index_site():
|
||||
IndexedCell.objects.all().delete()
|
||||
external_urls = {}
|
||||
for klass in CellBase.get_cell_classes():
|
||||
for cell in klass.objects.filter(page__snapshot__isnull=True).exclude(placeholder__startswith='_'):
|
||||
cell_type = ContentType.objects.get_for_model(cell)
|
||||
indexed_cell = IndexedCell(cell_type=cell_type, cell_pk=cell.id)
|
||||
try:
|
||||
indexed_cell.indexed_text = cell.render_for_search()
|
||||
except Exception: # ignore rendering error
|
||||
continue
|
||||
if indexed_cell.indexed_text:
|
||||
indexed_cell.page_id = cell.page_id
|
||||
indexed_cell.url = cell.page.get_online_url()
|
||||
indexed_cell.title = cell.page.title
|
||||
indexed_cell.save()
|
||||
set_cell_access(indexed_cell, cell)
|
||||
|
||||
for link_data in cell.get_external_links_data():
|
||||
# index external links
|
||||
indexed_cell = external_urls.get(indexed_cell.url)
|
||||
if indexed_cell is None:
|
||||
# create an entry for that link.
|
||||
indexed_cell = IndexedCell(cell_type=cell_type, cell_pk=cell.id)
|
||||
indexed_cell.save()
|
||||
set_cell_access(indexed_cell, cell)
|
||||
indexed_cell.url = link_data['url']
|
||||
indexed_cell.title = link_data['title']
|
||||
indexed_cell.indexed_text = link_data.get('text') or ''
|
||||
external_urls[indexed_cell.url] = indexed_cell
|
||||
else:
|
||||
# if that link already exists, add detailed texts
|
||||
indexed_cell.indexed_text += ' ' + link_data['title']
|
||||
indexed_cell.indexed_text += ' ' + link_data.get('text') or ''
|
||||
indexed_cell.save()
|
||||
|
||||
|
||||
def search_site(request, query):
|
||||
if connection.vendor == 'postgresql':
|
||||
config = settings.POSTGRESQL_FTS_SEARCH_CONFIG
|
||||
vector = SearchVector('title', config=config, weight='A') + SearchVector('indexed_text', config=config, weight='A')
|
||||
query = SearchQuery(query)
|
||||
qs = IndexedCell.objects.annotate(rank=SearchRank(vector, query)).filter(rank__gte=0.3).order_by('-rank')
|
||||
else:
|
||||
qs = IndexedCell.objects.filter(
|
||||
Q(indexed_text__icontains=query) | Q(title__icontains=query))
|
||||
if request.user.is_anonymous:
|
||||
qs = qs.exclude(public_access=False)
|
||||
else:
|
||||
qs = qs.filter(
|
||||
Q(restricted_groups=None) |
|
||||
Q(restricted_groups__in=request.user.groups.all()))
|
||||
qs = qs.exclude(excluded_groups__in=request.user.groups.all())
|
||||
|
||||
hits = []
|
||||
seen = {}
|
||||
for hit in qs:
|
||||
if hit.url in seen:
|
||||
continue
|
||||
hits.append({
|
||||
'text': hit.title,
|
||||
'rank': getattr(hit, 'rank', None),
|
||||
'url': hit.url,
|
||||
})
|
||||
seen[hit.url] = True
|
||||
if len(hits) == 10:
|
||||
break
|
||||
|
||||
return hits
|
|
@ -15,23 +15,8 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from django.apps import AppConfig
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
|
||||
class DataConfig(AppConfig):
|
||||
name = 'combo.data'
|
||||
verbose_name = 'data'
|
||||
|
||||
def ready(self):
|
||||
# register built-in search engine for page contents
|
||||
from combo.apps.search import engines
|
||||
engines.register(self.get_search_engines)
|
||||
|
||||
def get_search_engines(self):
|
||||
return {
|
||||
'_text': {
|
||||
'url': reverse('api-search') + '?q=%(q)s',
|
||||
'label': _('Page Contents'),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Generated by Django 1.11.17 on 2020-01-20 15:30
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('data', '0042_page_creation_timestamp'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.DeleteModel(
|
||||
name='ExternalLinkSearchItem',
|
||||
),
|
||||
]
|
|
@ -755,10 +755,6 @@ class CellBase(six.with_metaclass(CellMeta, models.Model)):
|
|||
return ''
|
||||
if self.user_dependant:
|
||||
return ''
|
||||
if not self.page.is_visible(user=None):
|
||||
return ''
|
||||
if not self.is_visible(user=None):
|
||||
return ''
|
||||
request = RequestFactory().get(self.page.get_online_url())
|
||||
request.user = None # compat
|
||||
context = {
|
||||
|
@ -1474,18 +1470,6 @@ class ConfigJsonCell(JsonCellBase):
|
|||
return context
|
||||
|
||||
|
||||
class ExternalLinkSearchItem(models.Model):
|
||||
# Link to an external site.
|
||||
#
|
||||
# Those are automatically collected during by the "update_index" command,
|
||||
# that calls get_external_links_data from all available cells, to be used
|
||||
# by the general search engine.
|
||||
title = models.CharField(_('Title'), max_length=150)
|
||||
text = models.TextField(blank=True)
|
||||
url = models.CharField(_('URL'), max_length=200, blank=True)
|
||||
last_update_timestamp = models.DateTimeField(auto_now=True)
|
||||
|
||||
|
||||
@receiver(pre_save, sender=Page)
|
||||
def create_redirects(sender, instance, raw, **kwargs):
|
||||
if raw or not instance.id or instance.snapshot_id:
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
# combo - content management system
|
||||
# Copyright (C) 2014-2017 Entr'ouvert
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it
|
||||
# under the terms of the GNU Affero General Public License as published
|
||||
# by the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from haystack import indexes
|
||||
from haystack.exceptions import SkipDocument
|
||||
|
||||
from .models import Page, CellBase, ExternalLinkSearchItem
|
||||
|
||||
class PageIndex(indexes.SearchIndex, indexes.Indexable):
|
||||
title = indexes.CharField(model_attr='title', boost=1.5)
|
||||
text = indexes.CharField(document=True, use_template=True,
|
||||
template_name='combo/search/page.txt')
|
||||
url = indexes.CharField(indexed=False)
|
||||
|
||||
def get_model(self):
|
||||
return Page
|
||||
|
||||
def prepare_url(self, obj):
|
||||
return obj.get_online_url()
|
||||
|
||||
def prepare(self, obj):
|
||||
if not obj.is_visible(user=None):
|
||||
raise SkipDocument()
|
||||
return super(PageIndex, self).prepare(obj)
|
||||
|
||||
|
||||
class ExternalLinkSearchIndex(indexes.SearchIndex, indexes.Indexable):
|
||||
title = indexes.CharField(model_attr='title', boost=1.5)
|
||||
text = indexes.CharField(model_attr='text', document=True)
|
||||
url = indexes.CharField(model_attr='url', indexed=False)
|
||||
|
||||
def get_model(self):
|
||||
return ExternalLinkSearchItem
|
|
@ -1,7 +0,0 @@
|
|||
{% autoescape off %}
|
||||
{% for cell in object.get_cells %}
|
||||
{% if cell.placeholder|first != '_' %} {# ignore technical placeholders #}
|
||||
{{ cell.render_for_search }}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endautoescape %}
|
|
@ -21,7 +21,6 @@ from . import views
|
|||
|
||||
urlpatterns = [
|
||||
url(r'^api/menu-badges/$', views.menu_badges),
|
||||
url(r'^api/search/$', views.api_search, name='api-search'),
|
||||
url(r'^ajax/cell/(?P<page_pk>\w+)/(?P<cell_reference>[\w_-]+)/$',
|
||||
views.ajax_page_cell, name='combo-public-ajax-page-cell'),
|
||||
url(r'^snapshot/(?P<pk>\w+)/$', manager_required(views.snapshot), name='combo-snapshot-view'),
|
||||
|
|
|
@ -40,9 +40,6 @@ from django.views.decorators.csrf import csrf_exempt
|
|||
from django.utils.translation import ugettext as _
|
||||
from django.forms.widgets import Media
|
||||
|
||||
from haystack.inputs import AutoQuery
|
||||
from haystack.query import SearchQuerySet, SQ
|
||||
|
||||
if 'mellon' in settings.INSTALLED_APPS:
|
||||
from mellon.utils import get_idps
|
||||
else:
|
||||
|
@ -577,31 +574,6 @@ def menu_badges(request):
|
|||
menu_badges.mellon_no_passive = True
|
||||
|
||||
|
||||
def api_search(request):
|
||||
for cell in SearchCell.get_cells_by_search_service('_text'):
|
||||
if not cell.is_visible(request.user):
|
||||
continue
|
||||
break
|
||||
else:
|
||||
raise Http404()
|
||||
query = request.GET.get('q') or ''
|
||||
sqs = SearchQuerySet().filter(SQ(content=AutoQuery(query)) | SQ(title=AutoQuery(query)))
|
||||
sqs = sqs.highlight()
|
||||
sqs.load_all()
|
||||
hits = []
|
||||
for hit in sqs:
|
||||
description = None
|
||||
if hit.model_name == 'page' and hit.highlighted['text']:
|
||||
description = '<p>%s</p>' % hit.highlighted['text'][0]
|
||||
hits.append({
|
||||
'text': hit.title,
|
||||
'url': hit.url,
|
||||
'description': description,
|
||||
})
|
||||
|
||||
return HttpResponse(json.dumps({'data': hits}), content_type='application/json')
|
||||
|
||||
|
||||
def snapshot(request, *args, **kwargs):
|
||||
snapshot = PageSnapshot.objects.get(id=kwargs['pk'])
|
||||
return publish_page(request, snapshot.get_page())
|
||||
|
|
|
@ -77,7 +77,6 @@ INSTALLED_APPS = (
|
|||
'combo.apps.pwa',
|
||||
'combo.apps.gallery',
|
||||
'combo.apps.kb',
|
||||
'haystack',
|
||||
'xstatic.pkg.josefinsans',
|
||||
'xstatic.pkg.leaflet',
|
||||
'xstatic.pkg.opensans',
|
||||
|
@ -189,13 +188,6 @@ CKEDITOR_CONFIGS = {
|
|||
CKEDITOR_CONFIGS['small'] = copy.copy(CKEDITOR_CONFIGS['default'])
|
||||
CKEDITOR_CONFIGS['small']['height'] = 150
|
||||
|
||||
HAYSTACK_CONNECTIONS = {
|
||||
'default': {
|
||||
'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine',
|
||||
'PATH': os.path.join(BASE_DIR, 'whoosh_index'),
|
||||
},
|
||||
}
|
||||
|
||||
# from solr.thumbnail -- https://sorl-thumbnail.readthedocs.io/en/latest/reference/settings.html
|
||||
THUMBNAIL_PRESERVE_FORMAT = True
|
||||
THUMBNAIL_FORCE_OVERWRITE = False
|
||||
|
@ -264,6 +256,7 @@ MELLON_IDENTITY_PROVIDERS = []
|
|||
|
||||
# search services
|
||||
COMBO_SEARCH_SERVICES = {}
|
||||
POSTGRESQL_FTS_SEARCH_CONFIG = 'french'
|
||||
|
||||
# mapping of payment modes
|
||||
LINGO_NO_ONLINE_PAYMENT_REASONS = {}
|
||||
|
|
|
@ -2,5 +2,3 @@
|
|||
|
||||
/sbin/runuser -u combo /usr/bin/combo-manage -- tenant_command cron --all-tenants
|
||||
/sbin/runuser -u combo /usr/bin/combo-manage -- tenant_command clearsessions --all-tenants
|
||||
# update_index cannot be used due to some bug in haystack/whoosh (#30509)
|
||||
/sbin/runuser -u combo /usr/bin/combo-manage -- tenant_command rebuild_index --noinput --all-tenants -v0
|
||||
|
|
|
@ -21,14 +21,13 @@ Depends: ${misc:Depends}, ${python3:Depends},
|
|||
python3-xstatic-opensans,
|
||||
python3-xstatic-roboto-fontface (>= 0.5.0.0),
|
||||
python3-eopayment (>= 1.35),
|
||||
python3-django-haystack (>= 2.4.0),
|
||||
python3-django-ratelimit,
|
||||
python3-sorl-thumbnail,
|
||||
python3-pil,
|
||||
python3-pywebpush,
|
||||
python3-pygal,
|
||||
python3-lxml
|
||||
Recommends: python3-django-mellon, python3-whoosh
|
||||
Recommends: python3-django-mellon
|
||||
Conflicts: python-lingo
|
||||
Breaks: combo (<< 2.34.post2)
|
||||
Description: Portal Management System (Python module)
|
||||
|
|
|
@ -11,7 +11,5 @@ XStatic_roboto-fontface
|
|||
eopayment>=1.13
|
||||
python-dateutil
|
||||
djangorestframework>=3.3, <3.7
|
||||
django-haystack
|
||||
whoosh
|
||||
sorl-thumbnail
|
||||
pyproj
|
||||
|
|
2
setup.py
2
setup.py
|
@ -163,9 +163,7 @@ setup(
|
|||
'eopayment>=1.41',
|
||||
'python-dateutil',
|
||||
'djangorestframework>=3.3, <3.7',
|
||||
'django-haystack',
|
||||
'django-ratelimit<3',
|
||||
'whoosh',
|
||||
'sorl-thumbnail',
|
||||
'Pillow',
|
||||
'pyproj',
|
||||
|
|
|
@ -44,9 +44,6 @@ COMBO_DASHBOARD_ENABLED = True
|
|||
import tempfile
|
||||
MEDIA_ROOT = tempfile.mkdtemp('combo-test')
|
||||
|
||||
HAYSTACK_CONNECTIONS['default']['PATH'] = os.path.join(
|
||||
tempfile.mkdtemp('combo-test-whoosh'))
|
||||
|
||||
if 'DISABLE_MIGRATIONS' in os.environ:
|
||||
class DisableMigrations(object):
|
||||
def __contains__(self, item):
|
||||
|
|
|
@ -6,17 +6,16 @@ import shutil
|
|||
import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import AnonymousUser, User, Group
|
||||
from django.test import override_settings
|
||||
from django.test.client import RequestFactory
|
||||
from django.core.management import call_command
|
||||
from django.core.urlresolvers import reverse
|
||||
|
||||
from haystack.exceptions import SkipDocument
|
||||
|
||||
from combo.apps.search.engines import engines
|
||||
from combo.apps.search.models import SearchCell
|
||||
from combo.apps.search.models import SearchCell, IndexedCell
|
||||
from combo.apps.search.utils import index_site, search_site
|
||||
from combo.data.models import Page, JsonCell, TextCell, MenuCell, LinkCell
|
||||
from combo.data.search_indexes import PageIndex
|
||||
|
||||
from .test_manager import login
|
||||
|
||||
|
@ -229,9 +228,9 @@ def test_search_contents():
|
|||
page = Page(title='example page', slug='example-page')
|
||||
page.save()
|
||||
|
||||
# no indexation of private cells (is_visible check)
|
||||
# private cells are indexed
|
||||
cell = TextCell(page=page, text='foobar', public=False, order=0)
|
||||
assert cell.render_for_search() == ''
|
||||
assert cell.render_for_search().strip() == 'foobar'
|
||||
|
||||
# no indexation of empty cells (is_relevant check)
|
||||
cell = TextCell(page=page, text='', order=0)
|
||||
|
@ -247,25 +246,20 @@ def test_search_contents():
|
|||
|
||||
def test_search_contents_index():
|
||||
page = Page(title='example page', slug='example-page')
|
||||
page.public = True
|
||||
page.save()
|
||||
|
||||
page_index = PageIndex()
|
||||
assert page_index.get_model() is Page
|
||||
|
||||
assert page_index.prepare_url(page) == '/example-page/'
|
||||
|
||||
page_index.prepare(page)
|
||||
|
||||
page.public = False
|
||||
with pytest.raises(SkipDocument):
|
||||
page_index.prepare(page)
|
||||
|
||||
page.public = True
|
||||
cell = TextCell(page=page, text='<p>foobar</p>', order=0)
|
||||
cell.save()
|
||||
|
||||
prepared_data = page_index.prepare(page)
|
||||
assert 'foobar' in prepared_data['text']
|
||||
request = RequestFactory().get('/')
|
||||
request.user = AnonymousUser()
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 0
|
||||
index_site()
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 1
|
||||
|
||||
|
||||
def test_search_contents_technical_placeholder():
|
||||
page = Page(title='example page', slug='example-page')
|
||||
|
@ -274,10 +268,14 @@ def test_search_contents_technical_placeholder():
|
|||
TextCell(page=page, text='<p>foobar</p>', order=0, placeholder='_off').save()
|
||||
TextCell(page=page, text='<p>barfoo</p>', order=0, placeholder='on').save()
|
||||
|
||||
page_index = PageIndex()
|
||||
prepared_data = page_index.prepare(page)
|
||||
assert 'barfoo' in prepared_data['text']
|
||||
assert not 'foobar' in prepared_data['text']
|
||||
request = RequestFactory().get('/')
|
||||
request.user = AnonymousUser()
|
||||
index_site()
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 0
|
||||
hits = search_site(request, 'barfoo')
|
||||
assert len(hits) == 1
|
||||
|
||||
|
||||
def test_search_api(app):
|
||||
page = Page(title='example page', slug='example-page')
|
||||
|
@ -291,70 +289,61 @@ def test_search_api(app):
|
|||
|
||||
cell = TextCell(page=second_page, text='<p>other baz</p>', order=0)
|
||||
cell.save()
|
||||
|
||||
page_index = PageIndex()
|
||||
page_index.reindex()
|
||||
|
||||
resp = app.get('/api/search/?q=foobar', status=404)
|
||||
index_site()
|
||||
|
||||
cell = SearchCell(page=page, _search_services={'data': ['_text']}, order=0)
|
||||
cell.save()
|
||||
|
||||
resp = app.get('/api/search/?q=foobar', status=200)
|
||||
assert len(resp.json['data']) == 1
|
||||
assert resp.json['data'][0]['text'] == 'example page'
|
||||
resp = app.get('/ajax/search/%s/_text/?q=foobar' % cell.id, status=200)
|
||||
assert resp.text.count('<li') == 1
|
||||
assert 'example page' in resp.text
|
||||
|
||||
resp = app.get('/api/search/?q=other', status=200)
|
||||
assert len(resp.json['data']) == 1
|
||||
assert resp.json['data'][0]['text'] == 'second page'
|
||||
resp = app.get('/ajax/search/%s/_text/?q=other' % cell.id, status=200)
|
||||
assert resp.text.count('<li') == 1
|
||||
assert 'second page' in resp.text
|
||||
|
||||
resp = app.get('/api/search/?q=baz', status=200)
|
||||
assert len(resp.json['data']) == 2
|
||||
resp = app.get('/ajax/search/%s/_text/?q=baz' % cell.id, status=200)
|
||||
assert resp.text.count('<li') == 2
|
||||
|
||||
resp = app.get('/api/search/?q=quux', status=200)
|
||||
assert len(resp.json['data']) == 0
|
||||
resp = app.get('/ajax/search/%s/_text/?q=quux' % cell.id, status=200)
|
||||
assert resp.text.count('<li') == 0
|
||||
|
||||
def test_update_index_command(app):
|
||||
call_command('clear_index', interactive=False)
|
||||
call_command('update_index') # empty site
|
||||
|
||||
def test_search_external_links(app):
|
||||
page = Page(title='example page', slug='example-page')
|
||||
page.save()
|
||||
|
||||
cell = SearchCell(page=page, _search_services={'data': ['_text']}, order=0)
|
||||
cell.save()
|
||||
|
||||
call_command('update_index')
|
||||
resp = app.get('/api/search/?q=foobar', status=200)
|
||||
assert len(resp.json['data']) == 0
|
||||
index_site()
|
||||
request = RequestFactory().get('/')
|
||||
request.user = AnonymousUser()
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 0
|
||||
|
||||
LinkCell(title='foobar', url='http://example.net', page=page, order=0).save()
|
||||
call_command('update_index')
|
||||
index_site()
|
||||
|
||||
resp = app.get('/api/search/?q=foobar', status=200)
|
||||
assert len(resp.json['data']) == 1
|
||||
assert resp.json['data'][0]['text'] == 'foobar'
|
||||
assert resp.json['data'][0]['description'] is None
|
||||
assert resp.json['data'][0]['url'] == 'http://example.net'
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 1
|
||||
assert hits[0]['text'] == 'foobar'
|
||||
assert hits[0]['url'] == 'http://example.net'
|
||||
|
||||
# second link with same target
|
||||
LinkCell(title='baz', url='http://example.net', page=page, order=0).save()
|
||||
call_command('update_index')
|
||||
|
||||
resp = app.get('/api/search/?q=baz', status=200)
|
||||
assert len(resp.json['data']) == 1
|
||||
assert resp.json['data'][0]['url'] == 'http://example.net'
|
||||
index_site()
|
||||
|
||||
# add a second link with the same target
|
||||
LinkCell(title='bar', url='http://example.net', page=page, order=0).save()
|
||||
call_command('update_index')
|
||||
hits = search_site(request, 'baz')
|
||||
assert len(hits) == 1
|
||||
assert hits[0]['text'] in ('foobar', 'baz')
|
||||
assert hits[0]['url'] == 'http://example.net'
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 1
|
||||
assert hits[0]['text'] in ('foobar', 'baz')
|
||||
assert hits[0]['url'] == 'http://example.net'
|
||||
|
||||
resp = app.get('/api/search/?q=baz', status=200)
|
||||
assert len(resp.json['data']) == 1
|
||||
assert resp.json['data'][0]['url'] == 'http://example.net'
|
||||
|
||||
resp = app.get('/api/search/?q=bar', status=200)
|
||||
assert len(resp.json['data']) == 1
|
||||
assert resp.json['data'][0]['url'] == 'http://example.net'
|
||||
|
||||
def test_manager_search_cell(app, admin_user):
|
||||
Page.objects.all().delete()
|
||||
|
@ -399,9 +388,6 @@ def test_manager_search_cell(app, admin_user):
|
|||
|
||||
|
||||
def test_manager_waiting_index_message(app, admin_user):
|
||||
from haystack import connections
|
||||
shutil.rmtree(connections['default'].get_backend().path)
|
||||
|
||||
Page.objects.all().delete()
|
||||
page = Page(title='One', slug='one', template_name='standard')
|
||||
page.save()
|
||||
|
@ -417,8 +403,7 @@ def test_manager_waiting_index_message(app, admin_user):
|
|||
resp = resp.form.submit().follow()
|
||||
assert 'Content indexing has been scheduled' in resp.text
|
||||
|
||||
os.mkdir(connections['default'].get_backend().path)
|
||||
call_command('update_index')
|
||||
index_site()
|
||||
resp = app.get('/manage/pages/%s/' % page.id)
|
||||
assert 'Content indexing has been scheduled' not in resp.text
|
||||
|
||||
|
@ -455,3 +440,73 @@ def test_profile_search_engines(app):
|
|||
page.save()
|
||||
search_engines = engines.get_engines()
|
||||
assert 'users' in search_engines.keys()
|
||||
|
||||
|
||||
def test_private_search(app):
|
||||
page = Page(title='example page', slug='example-page')
|
||||
page.save()
|
||||
|
||||
TextCell(page=page, text='<p>foobar</p>', order=0, public=False).save()
|
||||
TextCell(page=page, text='<p>barfoo</p>', order=0, public=True).save()
|
||||
|
||||
request = RequestFactory().get('/')
|
||||
request.user = AnonymousUser()
|
||||
index_site()
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 0
|
||||
hits = search_site(request, 'barfoo')
|
||||
assert len(hits) == 1
|
||||
|
||||
request.user = User.objects.create_user(username='normal-user')
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 1
|
||||
hits = search_site(request, 'barfoo')
|
||||
assert len(hits) == 1
|
||||
|
||||
|
||||
def test_restricted_search(app):
|
||||
group = Group(name='plop')
|
||||
group.save()
|
||||
|
||||
page = Page(title='example page', slug='example-page')
|
||||
page.save()
|
||||
|
||||
cell = TextCell(page=page, text='<p>foobar</p>', order=0, public=False)
|
||||
cell.save()
|
||||
cell.groups.set([group])
|
||||
TextCell(page=page, text='<p>barfoo</p>', order=0, public=False).save()
|
||||
index_site()
|
||||
|
||||
# first cell is restricted, it's not found
|
||||
request = RequestFactory().get('/')
|
||||
request.user = User.objects.create_user(username='normal-user')
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 0
|
||||
hits = search_site(request, 'barfoo')
|
||||
assert len(hits) == 1
|
||||
|
||||
page.groups.set([group])
|
||||
index_site()
|
||||
|
||||
# page is restricted, no cell is found
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 0
|
||||
hits = search_site(request, 'barfoo')
|
||||
assert len(hits) == 0
|
||||
|
||||
# user is in group, gets a result
|
||||
request.user.groups.set([group])
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 1
|
||||
hits = search_site(request, 'barfoo')
|
||||
assert len(hits) == 1
|
||||
|
||||
# cell is excluded from group view
|
||||
cell.restricted_to_unlogged = True
|
||||
cell.save()
|
||||
index_site()
|
||||
|
||||
hits = search_site(request, 'foobar')
|
||||
assert len(hits) == 0
|
||||
hits = search_site(request, 'barfoo')
|
||||
assert len(hits) == 1
|
||||
|
|
Loading…
Reference in New Issue