search: index external links data independently (#58269)

This commit is contained in:
Valentin Deniaud 2021-11-03 17:44:01 +01:00
parent 3dd41a03db
commit a67233d6f5
2 changed files with 40 additions and 21 deletions

View File

@ -48,7 +48,6 @@ def index_site():
# populate ContentType cache
ContentType.objects.get_for_models(*cell_classes)
IndexedCell.objects.all().delete()
external_urls = {}
validity_info_list = list(ValidityInfo.objects.select_related('content_type'))
pages_by_pk = {
p.pk: p for p in (Page.objects.prefetch_related(Prefetch('groups', to_attr='prefetched_groups')))
@ -90,26 +89,17 @@ def index_site():
for link_data in cell.get_external_links_data():
# index external links
indexed_cell = external_urls.get(link_data.get('url'))
if indexed_cell is None:
# create an entry for that link.
indexed_cell = IndexedCell(
cell_type=cell_type,
cell_pk=cell.id,
page_id=cell.page_id,
public_access=bool(cell.page.public and cell.public),
url=link_data['url'],
title=link_data['title'],
indexed_text=link_data.get('text') or '',
)
indexed_cell.save()
set_cell_groups(indexed_cell, cell)
external_urls[indexed_cell.url] = indexed_cell
else:
# if that link already exists, add detailed texts
indexed_cell.indexed_text += ' ' + link_data['title']
indexed_cell.indexed_text += ' ' + (link_data.get('text') or '')
indexed_cell.save()
indexed_cell = IndexedCell(
cell_type=cell_type,
cell_pk=cell.id,
page_id=cell.page_id,
public_access=bool(cell.page.public and cell.public),
url=link_data['url'],
title=link_data['title'],
indexed_text=link_data.get('text') or '',
)
indexed_cell.save()
set_cell_groups(indexed_cell, cell)
def search_site(request, query, pages=None, with_description=None):

View File

@ -1430,3 +1430,32 @@ def test_search_by_page_title(app):
assert hits[0]['text'] == page_of_interest.title
assert hits[0]['url'] == f'/{page_of_interest.slug}/'
assert hits[0]['rank'] > hits[1]['rank']
def test_search_same_link_multiple_pages(settings, app):
settings.KNOWN_SERVICES = {}
page = Page.objects.create(title='first page', slug='one')
LinkCell.objects.create(
title='foobar', url='http://example.net', page=page, placeholder='content', order=0
)
second_page = Page.objects.create(title='second page', slug='two')
LinkCell.objects.create(
title='barfoo', url='http://example.net', page=second_page, placeholder='content', order=0
)
index_site()
cell = SearchCell.objects.create(
page=page, placeholder='content', _search_services={'data': ['_text_page_one']}, order=1
)
resp = app.get('/ajax/search/%s/_text_page_one/?q=foobar' % cell.pk, status=200)
assert resp.text.count('<li') == 1
resp = app.get('/ajax/search/%s/_text_page_one/?q=barfoo' % cell.pk, status=200)
assert resp.text.count('<li') == 0
cell._search_services = {'data': ['_text_page_two']}
cell.save()
resp = app.get('/ajax/search/%s/_text_page_two/?q=foobar' % cell.pk, status=200)
assert resp.text.count('<li') == 0
resp = app.get('/ajax/search/%s/_text_page_two/?q=barfoo' % cell.pk, status=200)
assert resp.text.count('<li') == 1