search: better queries for index_site (#40252)
This commit is contained in:
parent
a83ce2c5ee
commit
3464f6d023
|
@ -17,34 +17,54 @@
|
|||
from django.conf import settings
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector
|
||||
from combo.data.models import CellBase
|
||||
from django.db import connection
|
||||
from django.db.models import Q
|
||||
from django.db.models import Q, Prefetch
|
||||
from django.db.transaction import atomic
|
||||
|
||||
from combo.data.models import Page, CellBase, ValidityInfo
|
||||
from .models import IndexedCell
|
||||
|
||||
|
||||
def set_cell_access(indexed_cell, cell):
|
||||
indexed_cell.public_access = bool(cell.page.public and cell.public)
|
||||
indexed_cell.excluded_groups.clear()
|
||||
indexed_cell.restricted_groups.clear()
|
||||
def set_cell_groups(indexed_cell, cell):
|
||||
restricted_groups = []
|
||||
excluded_groups = []
|
||||
if not indexed_cell.public_access:
|
||||
indexed_cell.restricted_groups.set(cell.groups.all())
|
||||
restricted_groups = cell.prefetched_groups
|
||||
if cell.restricted_to_unlogged:
|
||||
indexed_cell.excluded_groups.set(cell.page.groups.all())
|
||||
excluded_groups = cell.page.prefetched_groups
|
||||
else:
|
||||
for group in cell.page.groups.all():
|
||||
indexed_cell.restricted_groups.add(group)
|
||||
indexed_cell.save()
|
||||
for group in cell.page.prefetched_groups:
|
||||
restricted_groups.append(group)
|
||||
if restricted_groups:
|
||||
indexed_cell.restricted_groups.add(*restricted_groups)
|
||||
if excluded_groups:
|
||||
indexed_cell.excluded_groups.add(*excluded_groups)
|
||||
|
||||
|
||||
@atomic
|
||||
def index_site():
|
||||
cell_classes = list(CellBase.get_cell_classes())
|
||||
# populate ContentType cache
|
||||
ContentType.objects.get_for_models(*cell_classes)
|
||||
IndexedCell.objects.all().delete()
|
||||
external_urls = {}
|
||||
for klass in CellBase.get_cell_classes():
|
||||
for cell in klass.objects.filter(page__snapshot__isnull=True, page__sub_slug='').exclude(placeholder__startswith='_'):
|
||||
validity_info_list = list(ValidityInfo.objects.select_related('content_type'))
|
||||
pages_by_pk = {
|
||||
p.pk: p for p in (
|
||||
Page.objects
|
||||
.prefetch_related(Prefetch('groups', to_attr='prefetched_groups')))}
|
||||
for klass in cell_classes:
|
||||
queryset = (
|
||||
klass.objects
|
||||
.filter(page__snapshot__isnull=True, page__sub_slug='')
|
||||
.exclude(placeholder__startswith='_')
|
||||
.prefetch_related(
|
||||
Prefetch('groups', to_attr='prefetched_groups')))
|
||||
for cell in queryset:
|
||||
cell.prefetched_validity_info = [
|
||||
v for v in validity_info_list
|
||||
if v.object_id == cell.pk and v.content_type.model_class() == cell.__class__]
|
||||
cell.page = pages_by_pk.get(cell.page_id)
|
||||
cell_type = ContentType.objects.get_for_model(cell)
|
||||
indexed_cell = IndexedCell(cell_type=cell_type, cell_pk=cell.id)
|
||||
try:
|
||||
|
@ -52,29 +72,34 @@ def index_site():
|
|||
except Exception: # ignore rendering error
|
||||
continue
|
||||
if indexed_cell.indexed_text:
|
||||
indexed_cell.public_access = bool(cell.page.public and cell.public)
|
||||
indexed_cell.page_id = cell.page_id
|
||||
indexed_cell.url = cell.page.get_online_url()
|
||||
indexed_cell.title = cell.page.title
|
||||
indexed_cell.save()
|
||||
set_cell_access(indexed_cell, cell)
|
||||
set_cell_groups(indexed_cell, cell)
|
||||
|
||||
for link_data in cell.get_external_links_data():
|
||||
# index external links
|
||||
indexed_cell = external_urls.get(indexed_cell.url)
|
||||
if indexed_cell is None:
|
||||
# create an entry for that link.
|
||||
indexed_cell = IndexedCell(cell_type=cell_type, cell_pk=cell.id)
|
||||
indexed_cell = IndexedCell(
|
||||
cell_type=cell_type,
|
||||
cell_pk=cell.id,
|
||||
public_access=bool(cell.page.public and cell.public),
|
||||
url=link_data['url'],
|
||||
title=link_data['title'],
|
||||
indexed_text=link_data.get('text') or '',
|
||||
)
|
||||
indexed_cell.save()
|
||||
set_cell_access(indexed_cell, cell)
|
||||
indexed_cell.url = link_data['url']
|
||||
indexed_cell.title = link_data['title']
|
||||
indexed_cell.indexed_text = link_data.get('text') or ''
|
||||
set_cell_groups(indexed_cell, cell)
|
||||
external_urls[indexed_cell.url] = indexed_cell
|
||||
else:
|
||||
# if that link already exists, add detailed texts
|
||||
indexed_cell.indexed_text += ' ' + link_data['title']
|
||||
indexed_cell.indexed_text += ' ' + link_data.get('text') or ''
|
||||
indexed_cell.save()
|
||||
indexed_cell.save()
|
||||
|
||||
|
||||
def search_site(request, query):
|
||||
|
|
|
@ -544,4 +544,4 @@ def test_index_site_num_queries(app):
|
|||
index_site() # populate cache
|
||||
with CaptureQueriesContext(connection) as ctx:
|
||||
index_site()
|
||||
assert len(ctx.captured_queries) == 591
|
||||
assert len(ctx.captured_queries) == 195
|
||||
|
|
Loading…
Reference in New Issue