Merge branch 'master' into nanterre-recette

This commit is contained in:
Thomas NOËL 2018-04-05 09:47:16 +02:00
commit 33ccd0a34f
4 changed files with 16 additions and 9 deletions

View File

@ -167,8 +167,8 @@ class SearchView(APIView):
try:
threshold = float(request.GET.get('threshold', ''))
except ValueError:
threshold = 0.2
search = utils.PersonSearch(limit=threshold)
threshold = getattr(settings, 'ZOO_NANTERRE_SEARCH_THRESHOLD', 0.13)
search = utils.PersonSearch(limit=threshold, base_limit=threshold)
if 'q' in request.GET:
search = search.search_query(request.GET['q'])
else:

View File

@ -20,6 +20,7 @@ import datetime
from django.db.transaction import atomic
from django.db.models.query import Q
from django.utils.timezone import now
from django.conf import settings
from zoo.utils import strip_accents
from zoo.zoo_data.models import Entity
@ -31,9 +32,8 @@ from .utils import pair_sort, PersonSearch
def find_duplicates(limit=None, base_limit=None, queryset=None, days=None, count=None, ids=None,
progression=False):
# Define search space
limit = limit or 0.75
limit = limit or getattr(settings, 'ZOO_NANTERRE_DUPLICATES_THRESHOLD', 0.7)
base_limit = base_limit or limit / 2.0
'''Search for duplicate entities based on a list of field paths'''
qs = queryset or Entity.objects.all()
qs = qs.filter(schema__slug='individu')
if days:

View File

@ -112,7 +112,7 @@ class Command(BaseCommand):
elif command == 'delete':
qs = Duplicate.objects.all()
if limit:
qs = qs.filter(limit__lt=limit)
qs = qs.filter(score__lt=limit)
qs.delete()
elif command == 'list':
qs = Duplicate.objects.order_by('-created', '-id')

View File

@ -36,7 +36,7 @@ import psycopg2
from django.conf import settings
from django.contrib.postgres.search import TrigramDistance
from django.db import connection
from django.db.models import Q, F, Value, ExpressionWrapper, CharField
from django.db.models import Q, F, Value, ExpressionWrapper, CharField, When, Case
from django.db.models.functions import Least, Greatest, Coalesce, Concat
from django.db import transaction
from django.utils.timezone import now, make_aware
@ -340,9 +340,16 @@ class PersonSearch(object):
self.add_filter('name', q)
# Compute similarity score
for expression in (fullname_naissance, fullname_usage):
self.name_similarities.append(
Value(1.0) - TrigramDistance(expression, self.luv(fullname)))
self.name_similarities.append(
Value(1.0) - Case(
When(content__nom_de_naissance='',
then=Value(1.0)),
default=TrigramDistance(fullname_naissance, self.luv(fullname))))
self.name_similarities.append(
Value(1.0) - Case(
When(content__nom_d_usage='',
then=Value(1.0)),
default=TrigramDistance(fullname_usage, self.luv(fullname))))
return self
def search_names(self, names):