custom_user: use strict_word_similarity in find_duplicates (#80940)
gitea/authentic/pipeline/head This commit looks good
Details
gitea/authentic/pipeline/head This commit looks good
Details
This commit is contained in:
parent
1f980ae638
commit
7935236510
|
@ -16,6 +16,9 @@
|
|||
|
||||
from django.apps import AppConfig
|
||||
from django.db import DEFAULT_DB_ALIAS, router
|
||||
from django.db.models import CharField, TextField
|
||||
|
||||
from .postgres_utils import TrigramStrictWordSimilar
|
||||
|
||||
|
||||
class CustomUserConfig(AppConfig):
|
||||
|
@ -27,6 +30,10 @@ class CustomUserConfig(AppConfig):
|
|||
|
||||
post_migrate.connect(self.create_first_name_last_name_attributes, sender=self)
|
||||
|
||||
# register custom postgres ORM lookup
|
||||
CharField.register_lookup(TrigramStrictWordSimilar)
|
||||
TextField.register_lookup(TrigramStrictWordSimilar)
|
||||
|
||||
def create_first_name_last_name_attributes(
|
||||
self, app_config, verbosity=2, interactive=True, using=DEFAULT_DB_ALIAS, **kwargs
|
||||
):
|
||||
|
|
|
@ -34,6 +34,8 @@ from authentic2.models import AttributeValue
|
|||
from authentic2.utils.date import parse_date
|
||||
from authentic2.utils.lookups import ImmutableConcat, Unaccent
|
||||
|
||||
from .postgres_utils import TrigramStrictWordDistance
|
||||
|
||||
|
||||
class UserQuerySet(models.QuerySet):
|
||||
def free_text_search(self, search):
|
||||
|
@ -131,7 +133,9 @@ class UserQuerySet(models.QuerySet):
|
|||
def find_duplicates(
|
||||
self, first_name=None, last_name=None, fullname=None, birthdate=None, limit=5, threshold=None, ou=None
|
||||
):
|
||||
self.set_trigram_similarity_threshold(threshold=threshold or app_settings.A2_DUPLICATES_THRESHOLD)
|
||||
self.set_trigram_strict_word_similarity_threshold(
|
||||
threshold=threshold or app_settings.A2_DUPLICATES_THRESHOLD
|
||||
)
|
||||
|
||||
if fullname is not None:
|
||||
name = fullname
|
||||
|
@ -141,8 +145,8 @@ class UserQuerySet(models.QuerySet):
|
|||
name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii').lower()
|
||||
|
||||
qs = self.annotate(name=Lower(Unaccent(ImmutableConcat('first_name', Value(' '), 'last_name'))))
|
||||
qs = qs.filter(name__trigram_similar=name)
|
||||
qs = qs.annotate(dist=TrigramDistance('name', name))
|
||||
qs = qs.filter(name__trigram_strict_word_similar=name)
|
||||
qs = qs.annotate(dist=TrigramStrictWordDistance('name', name))
|
||||
qs = qs.order_by('dist')
|
||||
|
||||
if ou:
|
||||
|
@ -169,6 +173,13 @@ class UserQuerySet(models.QuerySet):
|
|||
|
||||
return qs
|
||||
|
||||
def set_trigram_strict_word_similarity_threshold(self, threshold=None):
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
'SET pg_trgm.strict_word_similarity_threshold = %f'
|
||||
% (threshold or app_settings.A2_FTS_THRESHOLD)
|
||||
)
|
||||
|
||||
def set_trigram_similarity_threshold(self, threshold=None):
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
# authentic2 - (C) Entr'ouvert
|
||||
|
||||
from django.contrib.postgres.search import TrigramBase
|
||||
from django.db.models.lookups import PostgresOperatorLookup
|
||||
|
||||
|
||||
class TrigramStrictWordSimilar(PostgresOperatorLookup):
|
||||
lookup_name = 'trigram_strict_word_similar'
|
||||
postgres_operator = '%%>>'
|
||||
|
||||
|
||||
class TrigramStrictWordDistance(TrigramBase):
|
||||
function = ''
|
||||
arg_joiner = ' <->>> '
|
|
@ -163,10 +163,44 @@ def test_fts_trigram(fts):
|
|||
# dist attribute signals queryset from find_duplicates()
|
||||
assert hasattr(User.objects.free_text_search('darmettein')[0], 'dist')
|
||||
|
||||
assert User.objects.free_text_search('lea darmettein').filter(dist__lte=0.3).count() == 1
|
||||
assert list(
|
||||
User.objects.free_text_search('lea darmettein')
|
||||
.filter(dist=0.0)
|
||||
.values_list('last_name', 'first_name')
|
||||
) == [('darmettein', 'Lea')]
|
||||
assert hasattr(User.objects.free_text_search('darmettein')[0], 'dist')
|
||||
|
||||
|
||||
def test_fts_last_name(db):
|
||||
first_names = [
|
||||
'Albert',
|
||||
'Michel',
|
||||
'Nicole',
|
||||
'Sylviane',
|
||||
'Jean-Pierre',
|
||||
'JEAN PIERRE',
|
||||
'Jean-Claude',
|
||||
'Jeanine',
|
||||
]
|
||||
for first_name in first_names:
|
||||
User.objects.create(last_name='ROSSET', first_name=first_name)
|
||||
User.objects.create(last_name='RUSSO', first_name='Rossetta')
|
||||
assert list(
|
||||
User.objects.free_text_search('rosset')
|
||||
.filter(dist__lt=0.2)
|
||||
.values_list('last_name', 'first_name', 'dist')
|
||||
) == [
|
||||
('ROSSET', 'Albert', 0.0),
|
||||
('ROSSET', 'Jean-Claude', 0.0),
|
||||
('ROSSET', 'Jeanine', 0.0),
|
||||
('ROSSET', 'Jean-Pierre', 0.0),
|
||||
('ROSSET', 'JEAN PIERRE', 0.0),
|
||||
('ROSSET', 'Michel', 0.0),
|
||||
('ROSSET', 'Nicole', 0.0),
|
||||
('ROSSET', 'Sylviane', 0.0),
|
||||
]
|
||||
|
||||
|
||||
def test_fts_legacy(fts):
|
||||
assert User.objects.free_text_search('rue des peupliers').count() == 3
|
||||
|
||||
|
|
Loading…
Reference in New Issue