search: provide title precedence over content for indexed cells (#43781)

This commit is contained in:
Paul Marillonnet 2020-06-10 10:32:33 +02:00
parent 2fd9129c86
commit 5356dbde4e
2 changed files with 40 additions and 2 deletions

View File

@ -113,9 +113,9 @@ def search_site(request, query, pages=None):
if connection.vendor == 'postgresql':
config = settings.POSTGRESQL_FTS_SEARCH_CONFIG
vector = SearchVector('title', config=config, weight='A') + SearchVector('indexed_text', config=config, weight='A')
vector = SearchVector('title', config=config, weight='A') + SearchVector('indexed_text', config=config, weight='B')
query = SearchQuery(query, config=config)
qs = IndexedCell.objects.annotate(rank=SearchRank(vector, query)).filter(rank__gte=0.3).order_by('-rank')
qs = IndexedCell.objects.annotate(rank=SearchRank(vector, query)).filter(rank__gte=0.2).order_by('-rank')
else:
qs = IndexedCell.objects.filter(
Q(indexed_text__icontains=query) | Q(title__icontains=query))

View File

@ -813,3 +813,41 @@ def test_index_site_search_engines_load(get_engines_mock, settings, app):
# no exception raised
index_site()
@pytest.mark.skipif(
connection.vendor != 'postgresql',
reason='only postgresql is supported')
def test_search_by_page_title(app):
query = 'nanoparticle electrochemistry'
# First page containing a cell whose text matches the query
page = Page(title='example page', slug='example-page')
page.save()
TextCell(
page=page,
placeholder='content',
text='<p>Some nanoparticle electrochemistry content here</p>',
order=0,
public=True
).save()
# Second page whose title matches the search query
page_of_interest = Page(
title='Nanoparticle electrochemistry', slug='page-of-interest')
page_of_interest.save()
TextCell(
page=page_of_interest,
placeholder='content',
text='<p>Some random text here</p>',
order=0,
public=True
).save()
request = RequestFactory().get('/')
request.user = AnonymousUser()
index_site()
hits = search_site(request, query)
# Check that title matching gets precedence over content matching
assert len(hits) == 2
assert hits[0]['text'] == page_of_interest.title
assert hits[0]['url'] == '/{}/'.format(page_of_interest.slug)
assert hits[0]['rank'] > hits[1]['rank']