Add cleanup for deleted object entries in buildwatson

This commit is contained in:
Cristopher Hernandez 2021-10-24 16:24:22 -07:00 committed by Guillaume Baffoin
parent 6fbe3b0229
commit 21a8a498b3
3 changed files with 42 additions and 0 deletions

View File

@ -24,6 +24,7 @@ from django.contrib.auth.models import User
from django import template
from django.utils.encoding import force_str
from django.db.models import Case, When, Value, IntegerField
from django.db import connection
from watson import search as watson
from watson.models import SearchEntry
@ -186,6 +187,19 @@ class InternalsTest(SearchTestBase):
self.assertEqual(watson.search("fooo1").count(), 1)
self.assertEqual(watson.search("fooo2").count(), 1)
self.assertEqual(watson.search("fooo3").count(), 1)
# Use raw deletion query to remove record directly from the database (no signals triggered).
# This is so that the cleanup functionality of buildwatson can be tested
with connection.cursor() as cursor:
cursor.execute(
'DELETE FROM ' + WatsonTestModel1._meta.db_table + ' WHERE ' + WatsonTestModel1._meta.pk.name + ' = %s',
[self.test11.id]
)
# Run the rebuild command again.
call_command("buildwatson", verbosity=0)
# Test that the deleted object is now gone, but the other objects can still be found.
self.assertEqual(watson.search("fooo1").count(), 0)
self.assertEqual(watson.search("fooo2").count(), 1)
self.assertEqual(watson.search("fooo3").count(), 1)
def testUpdateSearchIndex(self):
# Update a model and make sure that the search results match.

View File

@ -66,9 +66,11 @@ def rebuild_index_for_model(model_, engine_slug_, verbosity_, slim_=False, batch
)
)
if non_atomic_:
search_engine_.cleanup_model_index(model_)
_bulk_save_search_entries(iter_search_entries(), batch_size=batch_size_)
else:
with transaction.atomic():
search_engine_.cleanup_model_index(model_)
_bulk_save_search_entries(iter_search_entries(), batch_size=batch_size_)
return local_refreshed_model_count[0]

View File

@ -15,6 +15,7 @@ from django.core.exceptions import ImproperlyConfigured, ObjectDoesNotExist
from django.db import models, connections, router
from django.db.models import Q
from django.db.models.expressions import RawSQL
from django.db.models.functions import Coalesce
from django.db.models.query import QuerySet
from django.db.models.signals import post_save, pre_delete
from django.utils.encoding import force_str
@ -442,6 +443,26 @@ class SearchEngine(object):
model=model,
))
def _get_deleted_entries_for_model(self, model):
"""Returns a queryset of entries associated with deleted object instances of the given model"""
from django.contrib.contenttypes.models import ContentType
from watson.models import SearchEntry, has_int_pk
content_type = ContentType.objects.get_for_model(model)
# subquery to get entries which cannot be found in the original table
lookup_subquery = models.Subquery(
model.objects.all().values('pk')
)
# map the lookup to the appropriate id field
if has_int_pk(model):
id_lookup = {'object_id_int__in': lookup_subquery}
else:
id_lookup = {'object_id__in': lookup_subquery}
return SearchEntry.objects.filter(
Q(content_type=content_type) &
Q(engine_slug=self._engine_slug) &
~Q(**id_lookup)
)
def _get_entries_for_obj(self, obj):
"""Returns a queryset of entries associate with the given obj."""
from django.contrib.contenttypes.models import ContentType
@ -501,6 +522,11 @@ class SearchEngine(object):
# Oh no! Somehow we've got duplicated search entries!
search_entries.exclude(id=search_entries[0].id).delete()
def cleanup_model_index(self, model):
"""Removes search index entries which map to deleted object instances for the given model"""
search_entries = self._get_deleted_entries_for_model(model)
search_entries.delete()
def update_obj_index(self, obj):
"""Updates the search index for the given obj."""
_bulk_save_search_entries(list(self._update_obj_index_iter(obj)))