settings: decrease A2_DUPLICATES_THRESHOLD to 0.2 (#50445)

The original limit of 0.7 is kept for the find-duplicates web-service API.
This commit is contained in:
Benjamin Dauvergne 2021-01-22 15:36:00 +01:00
parent e306d5afbf
commit 1ff8790da0
2 changed files with 5 additions and 5 deletions

View File

@ -77,7 +77,7 @@ class UserQuerySet(models.QuerySet):
if qs.exists():
return qs
qs = self.find_duplicates(fullname=search, limit=None)
qs = self.find_duplicates(fullname=search, limit=None, threshold=0.2)
extra_user_ids = set()
attribute_values = AttributeValue.objects.filter(search_vector=SearchQuery(search), attribute__searchable=True)
extra_user_ids.update(self.filter(attribute_values__in=attribute_values).values_list('id', flat=True))
@ -92,10 +92,10 @@ class UserQuerySet(models.QuerySet):
qs = qs.order_by('dist', 'last_name', 'first_name')
return qs
def find_duplicates(self, first_name=None, last_name=None, fullname=None, birthdate=None, limit=5):
def find_duplicates(self, first_name=None, last_name=None, fullname=None, birthdate=None, limit=5, threshold=None):
with connection.cursor() as cursor:
cursor.execute(
"SET pg_trgm.similarity_threshold = %f" % app_settings.A2_DUPLICATES_THRESHOLD
"SET pg_trgm.similarity_threshold = %f" % (threshold or app_settings.A2_DUPLICATES_THRESHOLD)
)
if fullname is not None:

View File

@ -127,12 +127,12 @@ def test_fts_trigram(fts):
# dist attribute signals queryset from find_duplicates()
assert hasattr(User.objects.free_text_search('darmettein')[0], 'dist')
assert User.objects.free_text_search('lea darmettein').count() == 1
assert User.objects.free_text_search('lea darmettein').filter(dist__lte=0.3).count() == 1
assert hasattr(User.objects.free_text_search('darmettein')[0], 'dist')
def test_fts_legacy(fts):
assert User.objects.free_text_search('rue des peupliers').count() == 2
assert User.objects.free_text_search('rue des peupliers').count() == 3
def test_fts_legacy_and_trigram(fts):