combo/tests/test_search.py

513 lines
19 KiB
Python

import json
import os
import pytest
import re
import shutil
import mock
from django.conf import settings
from django.contrib.auth.models import AnonymousUser, User, Group
from django.test import override_settings
from django.test.client import RequestFactory
from django.core.management import call_command
from django.core.urlresolvers import reverse
from combo.apps.search.engines import engines
from combo.apps.search.models import SearchCell, IndexedCell
from combo.apps.search.utils import index_site, search_site
from combo.data.models import Page, JsonCell, TextCell, MenuCell, LinkCell
from .test_manager import login
pytestmark = pytest.mark.django_db
SEARCH_SERVICES = {
'search1': {
'label': 'Search 1',
'url': 'http://www.example.net/search/?q=%(q)s',
},
'search_tmpl': {
'label': 'Search with template',
'url': '[search_url]?q=%(q)s',
},
'search_alternate_key': {
'label': 'Search with alternate key',
'url': 'http://www.example.net/search/?q=%(q)s',
'data_key': 'results',
},
}
TEMPLATE_VARS = {'search_url': 'http://search.example.net/'}
class SearchServices(object):
def __init__(self, search_services):
self.search_services = search_services
def __enter__(self):
settings.COMBO_SEARCH_SERVICES = self.search_services
def __exit__(self, *args, **kwargs):
settings.COMBO_SEARCH_SERVICES = {}
def test_search_cell(app):
with SearchServices(SEARCH_SERVICES):
page = Page(title='Search', slug='search_page', template_name='standard')
page.save()
cell = SearchCell(page=page, placeholder='content', order=0)
cell._search_services = {'data': ['search1']}
cell.save()
resp = cell.render({})
assert 'input' in resp
assert 'id="combo-search-input-%s"' % cell.pk in resp
assert 'autofocus' not in resp
cell.autofocus = True
cell.save()
resp = cell.render({})
assert 'autofocus' in resp
cell.slug = 'var-name'
context = {'request': RequestFactory().get('/?q_var_name=searchme')}
resp = cell.render(context)
assert "$input.val('searchme');" in resp
with mock.patch('combo.apps.search.models.requests.get') as requests_get:
response = {'err': 0, 'data': []}
mock_json = mock.Mock()
mock_json.json.return_value = response
requests_get.return_value = mock_json
resp = app.get('/ajax/search/%s/search1/?q=foo' % cell.pk, status=200)
assert requests_get.call_args[0][0] == 'http://www.example.net/search/?q=foo'
assert '<li>' not in resp.text
assert 'no result found' in resp.text
resp = app.get('/ajax/search/%s/search1/?q=foo%%23bar' % cell.pk, status=200)
assert requests_get.call_args[0][0] == 'http://www.example.net/search/?q=foo%23bar'
assert '<li>' not in resp.text
assert 'no result found' in resp.text
response['data'] = [{'url': 'http://test', 'text': 'barbarbar'}]
resp = app.get('/ajax/search/%s/search1/?q=foo' % cell.pk, status=200)
assert resp.text.count('<li>') == 1
assert '<li><a href="http://test">barbarbar</a>' in resp.text
assert 'no result found' not in resp.text
response['data'] = [{'url': 'http://test', 'text': 'barbarbar',
'description': 'this is <b>html</b>'}]
resp = app.get('/ajax/search/%s/search1/?q=foo' % cell.pk, status=200)
assert resp.text.count('<li>') == 1
assert '<li><a href="http://test">barbarbar</a>' in resp.text
assert 'this is <b>html</b>' in resp.text
assert 'no result found' not in resp.text
resp = app.get('/ajax/search/%s/search1/?q=' % cell.pk, status=200)
assert '<li>' not in resp.text
assert 'no result found' not in resp.text
cell._search_services = {'data': ['search_alternate_key']}
cell.save()
response = {'results': [{'url': 'http://test', 'text': 'barbarbar'}]}
mock_json.json.return_value = response
resp = app.get('/ajax/search/%s/search_alternate_key/?q=foo' % cell.pk, status=200)
assert resp.text.count('<li>') == 1
assert '<li><a href="http://test">barbarbar</a>' in resp.text
# search engine does not return valid JSON
class FakedResponse(mock.Mock):
def json(self):
return json.loads(self.content)
requests_get.return_value = FakedResponse(content='notjson', status_code=200)
resp = app.get('/ajax/search/%s/search_alternate_key/?q=bar' % cell.pk, status=200)
assert requests_get.call_args[0][0] == 'http://www.example.net/search/?q=bar'
assert '<li>' not in resp.text
assert 'no result found' in resp.text
requests_get.return_value = FakedResponse(content='500withbadjson', status_code=500)
resp = app.get('/ajax/search/%s/search_alternate_key/?q=foo' % cell.pk, status=200)
assert requests_get.call_args[0][0] == 'http://www.example.net/search/?q=foo'
assert '<li>' not in resp.text
assert 'no result found' in resp.text
with override_settings(TEMPLATE_VARS=TEMPLATE_VARS):
cell._search_services = {'data': ['search_tmpl']}
cell.save()
with mock.patch('combo.apps.search.models.requests.get') as requests_get:
response = {'err': 0, 'data': []}
mock_json = mock.Mock()
mock_json.json.return_value = response
requests_get.return_value = mock_json
resp = app.get('/ajax/search/%s/search_tmpl/?q=foo' % cell.pk, status=200)
assert requests_get.call_args[0][0] == 'http://search.example.net/?q=foo'
# TEMPLATE_VARS are accessible in template
cell.slug = 'searchfoo'
cell.save()
templates_settings = [settings.TEMPLATES[0].copy()]
templates_settings[0]['DIRS'] = ['%s/templates-1' % os.path.abspath(os.path.dirname(__file__))]
with override_settings(TEMPLATES=templates_settings):
resp = app.get('/ajax/search/%s/search_tmpl/?q=bar' % cell.pk, status=200)
assert requests_get.call_args[0][0] == 'http://search.example.net/?q=bar'
assert 'searchfoo results.data=[]' in resp.text
assert 'search_url=http://search.example.net/' in resp.text
def test_search_global_context(app):
with SearchServices(SEARCH_SERVICES):
page = Page(title='Search', slug='search_page', template_name='standard')
page.save()
cell = SearchCell(page=page, placeholder='content', order=0)
cell._search_services = {'data': ['search1']}
cell.save()
assert cell.varname == ''
cell.slug = 'search-item'
cell.save()
assert cell.varname == 'search_item'
jsoncell = JsonCell(page=page, placeholder='content', order=0)
jsoncell.url = 'http://www.example.net/search/[search_item]/'
jsoncell.save()
url = reverse('combo-public-ajax-page-cell',
kwargs={'page_pk': page.id, 'cell_reference': jsoncell.get_reference()}) + \
'?search_item=foo'
with mock.patch('combo.utils.requests.get') as requests_get:
data = {'data': []}
requests_get.return_value = mock.Mock(json=lambda: data, status_code=200)
resp = app.get(url)
assert requests_get.call_args[0][0] == 'http://www.example.net/search/foo/'
def test_search_custom_templates(app):
services = {
'search2': {
'label': 'Search 2',
'url': 'http://www.example.net/search/?q=%(q)s',
'hit_url_template': 'http://example.net/{{id}}/',
'hit_label_template': '{{a}} {{b}}',
'hit_description_template': 'description {{a}}',
}
}
with SearchServices(services):
page = Page(title='Search', slug='search_page', template_name='standard')
page.save()
cell = SearchCell(page=page, placeholder='content', order=0)
cell._search_services = {'data': ['search2']}
cell.save()
with mock.patch('combo.apps.search.models.requests.get') as requests_get:
response = {
'err': 0,
'data': [{'id': '123', 'a': 'A', 'b': 'B'}],
}
mock_json = mock.Mock()
mock_json.json.return_value = response
requests_get.return_value = mock_json
resp = app.get('/ajax/search/%s/search2/?q=foo' % cell.pk, status=200)
assert resp.text.count('<li>') == 1
assert '<li><a href="http://example.net/123/">A B</a>' in resp.text
assert '<div>description A</div>' in resp.text
def test_search_cell_visibility(app):
page = Page(title='example page', slug='example-page')
page.save()
with SearchServices(SEARCH_SERVICES):
cell = SearchCell(page=page, order=0)
assert not cell.is_visible()
cell._search_services = {'data': ['_text']}
assert cell.is_visible()
def test_search_contents():
page = Page(title='example page', slug='example-page')
page.save()
# private cells are indexed
cell = TextCell(page=page, text='foobar', public=False, order=0)
assert cell.render_for_search().strip() == 'foobar'
# no indexation of empty cells (is_relevant check)
cell = TextCell(page=page, text='', order=0)
assert cell.render_for_search() == ''
# indexation
cell = TextCell(page=page, text='<p>foobar</p>', order=0)
assert cell.render_for_search().strip() == 'foobar'
# no indexation of menu cells
cell = MenuCell(page=page, order=0)
assert cell.render_for_search() == ''
def test_search_contents_index():
page = Page(title='example page', slug='example-page')
page.public = True
page.save()
cell = TextCell(page=page, text='<p>foobar</p>', order=0)
cell.save()
request = RequestFactory().get('/')
request.user = AnonymousUser()
hits = search_site(request, 'foobar')
assert len(hits) == 0
index_site()
hits = search_site(request, 'foobar')
assert len(hits) == 1
def test_search_contents_technical_placeholder():
page = Page(title='example page', slug='example-page')
page.save()
TextCell(page=page, text='<p>foobar</p>', order=0, placeholder='_off').save()
TextCell(page=page, text='<p>barfoo</p>', order=0, placeholder='on').save()
request = RequestFactory().get('/')
request.user = AnonymousUser()
index_site()
hits = search_site(request, 'foobar')
assert len(hits) == 0
hits = search_site(request, 'barfoo')
assert len(hits) == 1
def test_search_api(app):
page = Page(title='example page', slug='example-page')
page.save()
cell = TextCell(page=page, text='<p>foobar baz</p>', order=0)
cell.save()
second_page = Page(title='second page', slug='second-page')
second_page.save()
cell = TextCell(page=second_page, text='<p>other baz</p>', order=0)
cell.save()
index_site()
cell = SearchCell(page=page, _search_services={'data': ['_text']}, order=0)
cell.save()
resp = app.get('/ajax/search/%s/_text/?q=foobar' % cell.id, status=200)
assert resp.text.count('<li') == 1
assert 'example page' in resp.text
resp = app.get('/ajax/search/%s/_text/?q=other' % cell.id, status=200)
assert resp.text.count('<li') == 1
assert 'second page' in resp.text
resp = app.get('/ajax/search/%s/_text/?q=baz' % cell.id, status=200)
assert resp.text.count('<li') == 2
resp = app.get('/ajax/search/%s/_text/?q=quux' % cell.id, status=200)
assert resp.text.count('<li') == 0
def test_search_external_links(app):
page = Page(title='example page', slug='example-page')
page.save()
cell = SearchCell(page=page, _search_services={'data': ['_text']}, order=0)
cell.save()
index_site()
request = RequestFactory().get('/')
request.user = AnonymousUser()
hits = search_site(request, 'foobar')
assert len(hits) == 0
LinkCell(title='foobar', url='http://example.net', page=page, order=0).save()
index_site()
hits = search_site(request, 'foobar')
assert len(hits) == 1
assert hits[0]['text'] == 'foobar'
assert hits[0]['url'] == 'http://example.net'
# second link with same target
LinkCell(title='baz', url='http://example.net', page=page, order=0).save()
index_site()
# add a second link with the same target
hits = search_site(request, 'baz')
assert len(hits) == 1
assert hits[0]['text'] in ('foobar', 'baz')
assert hits[0]['url'] == 'http://example.net'
hits = search_site(request, 'foobar')
assert len(hits) == 1
assert hits[0]['text'] in ('foobar', 'baz')
assert hits[0]['url'] == 'http://example.net'
def test_manager_search_cell(app, admin_user):
Page.objects.all().delete()
page = Page(title='One', slug='one', template_name='standard')
page.save()
app = login(app)
resp = app.get('/manage/pages/%s/' % page.id)
resp = app.get(resp.html.find('option',
**{'data-add-url': re.compile('search_searchcell')})['data-add-url'])
cells = Page.objects.get(id=page.id).get_cells()
assert len(cells) == 1
assert isinstance(cells[0], SearchCell)
with override_settings(KNOWN_SERVICES={}):
resp = app.get('/manage/pages/%s/' % page.id)
assert ('data-cell-reference="%s"' % cells[0].get_reference()) in resp.text
assert len(resp.form['c%s-_search_services' % cells[0].get_reference()].options) == 1
with SearchServices(SEARCH_SERVICES):
resp = app.get('/manage/pages/%s/' % page.id)
assert len(resp.form['c%s-_search_services' % cells[0].get_reference()].options) == 4
# simulate reordering of options
resp.form['c%s-_search_services' % cells[0].get_reference()].options = [
(u'search_tmpl', False, u'Search with template'),
(u'search_alternate_key', False, u'Search with alternate key'),
(u'_text', False, u'Page Contents'),
(u'search1', False, u'Search 1')]
resp.form['c%s-_search_services' % cells[0].get_reference()].value = ['search_tmpl', '_text']
resp = resp.form.submit()
assert resp.status_int == 302
# check selected engines are selected and the first items of the list
resp = app.get('/manage/pages/%s/' % page.id)
assert set(resp.form['c%s-_search_services' % cells[0].get_reference()].value) == set(['search_tmpl', '_text'])
assert resp.form['c%s-_search_services' % cells[0].get_reference()].options[0][0] == 'search_tmpl'
assert resp.form['c%s-_search_services' % cells[0].get_reference()].options[1][0] == '_text'
# check there's no crash if search engines are removed from config
resp = app.get('/manage/pages/%s/' % page.id)
assert resp.form['c%s-_search_services' % cells[0].get_reference()].value == ['_text']
def test_manager_waiting_index_message(app, admin_user):
Page.objects.all().delete()
page = Page(title='One', slug='one', template_name='standard')
page.save()
app = login(app)
resp = app.get('/manage/pages/%s/' % page.id)
resp = app.get(resp.html.find('option',
**{'data-add-url': re.compile('search_searchcell')})['data-add-url'])
resp = resp.follow()
assert 'Content indexing has been scheduled' not in resp.text
cells = Page.objects.get(id=page.id).get_cells()
resp.form['c%s-_search_services' % cells[0].get_reference()] = ['_text']
resp = resp.form.submit().follow()
assert 'Content indexing has been scheduled' in resp.text
index_site()
resp = app.get('/manage/pages/%s/' % page.id)
assert 'Content indexing has been scheduled' not in resp.text
def test_manager_search_cell(app, admin_user):
Page.objects.all().delete()
page = Page(title='One', slug='one', template_name='standard')
page.save()
app = login(app)
resp = app.get('/manage/pages/%s/' % page.id)
resp = app.get(resp.html.find('option',
**{'data-add-url': re.compile('search_searchcell')})['data-add-url'])
def test_wcs_search_engines(app):
with override_settings(KNOWN_SERVICES={}):
search_engines = engines.get_engines()
assert 'tracking-code' not in search_engines.keys()
assert len([x for x in search_engines.keys() if x.startswith('formdata:')]) == 0
for key, engine in engines.get_engines().items():
if key.startswith('formdata:'):
assert '&include-anonymised=off' in engine['url']
def test_profile_search_engines(app):
search_engines = engines.get_engines()
assert 'users' not in search_engines.keys()
with override_settings(KNOWN_SERVICES={'authentic': {'default': {'title': 'authentic', 'url': 'https://authentic/'}}}):
search_engines = engines.get_engines()
assert 'users' not in search_engines.keys()
page = Page(slug='users', title='Users', sub_slug='(?P<name_id>[a-z0-9]+)')
page.save()
search_engines = engines.get_engines()
assert 'users' in search_engines.keys()
def test_private_search(app):
page = Page(title='example page', slug='example-page')
page.save()
TextCell(page=page, text='<p>foobar</p>', order=0, public=False).save()
TextCell(page=page, text='<p>barfoo</p>', order=0, public=True).save()
request = RequestFactory().get('/')
request.user = AnonymousUser()
index_site()
hits = search_site(request, 'foobar')
assert len(hits) == 0
hits = search_site(request, 'barfoo')
assert len(hits) == 1
request.user = User.objects.create_user(username='normal-user')
hits = search_site(request, 'foobar')
assert len(hits) == 1
hits = search_site(request, 'barfoo')
assert len(hits) == 1
def test_restricted_search(app):
group = Group(name='plop')
group.save()
page = Page(title='example page', slug='example-page')
page.save()
cell = TextCell(page=page, text='<p>foobar</p>', order=0, public=False)
cell.save()
cell.groups.set([group])
TextCell(page=page, text='<p>barfoo</p>', order=0, public=False).save()
index_site()
# first cell is restricted, it's not found
request = RequestFactory().get('/')
request.user = User.objects.create_user(username='normal-user')
hits = search_site(request, 'foobar')
assert len(hits) == 0
hits = search_site(request, 'barfoo')
assert len(hits) == 1
page.groups.set([group])
index_site()
# page is restricted, no cell is found
hits = search_site(request, 'foobar')
assert len(hits) == 0
hits = search_site(request, 'barfoo')
assert len(hits) == 0
# user is in group, gets a result
request.user.groups.set([group])
hits = search_site(request, 'foobar')
assert len(hits) == 1
hits = search_site(request, 'barfoo')
assert len(hits) == 1
# cell is excluded from group view
cell.restricted_to_unlogged = True
cell.save()
index_site()
hits = search_site(request, 'foobar')
assert len(hits) == 0
hits = search_site(request, 'barfoo')
assert len(hits) == 1