nanterre: ajoute une méthode de recherche spécifique pour les doublons (fixes #22330)
This commit is contained in:
parent
ef8a4565d6
commit
774610685f
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import datetime
|
||||
import csv
|
||||
|
||||
import pytest
|
||||
import django_webtest
|
||||
|
@ -281,3 +282,31 @@ def app(request):
|
|||
wtm._patch_settings()
|
||||
request.addfinalizer(wtm._unpatch_settings)
|
||||
return django_webtest.DjangoTestApp(extra_environ={'HTTP_HOST': 'localhost'})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def lot_of_names(rsu_schema):
|
||||
try:
|
||||
reader = csv.reader(open('tests/export-etat-civil.csv'))
|
||||
except IOError:
|
||||
pytest.skip()
|
||||
|
||||
def generate():
|
||||
tr = Transaction.objects.create()
|
||||
for genre, statut_legal, prenoms, nom_de_naissance, nom_d_usage, date_de_naissance in reader:
|
||||
yield Entity(
|
||||
schema=rsu_schema['individu'],
|
||||
created=tr,
|
||||
content={
|
||||
'genre': genre,
|
||||
'prenoms': prenoms,
|
||||
'nom_de_naissance': nom_de_naissance,
|
||||
'nom_d_usage': nom_d_usage,
|
||||
'date_de_naissance': date_de_naissance,
|
||||
'telephones': [],
|
||||
'statut_legal': statut_legal,
|
||||
'email': '',
|
||||
'cles_de_federation': {},
|
||||
})
|
||||
|
||||
Entity.objects.bulk_create(generate())
|
||||
|
|
|
@ -2,9 +2,10 @@ import urlparse
|
|||
import copy
|
||||
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.core.management import call_command
|
||||
|
||||
from zoo.zoo_nanterre.models import Duplicate
|
||||
from zoo.zoo_data.models import Log
|
||||
from zoo.zoo_data.models import Log, Entity
|
||||
|
||||
|
||||
def test_list_doublons(nanterre_classic_family, app):
|
||||
|
@ -156,3 +157,11 @@ def test_list_doublons(nanterre_classic_family, app):
|
|||
log = Log.objects.filter(entity_id=third_data['individu_2']['id']).latest('id')
|
||||
assert 'non doublon de' in log.content['text']
|
||||
assert log.content['meta']['form_id'] == 103
|
||||
|
||||
|
||||
def test_doublons_cmd(lot_of_names):
|
||||
call_command('rsu-duplicates', 'find')
|
||||
assert Duplicate.objects.count() < (Entity.objects.filter(schema__slug='individu').count() / 5)
|
||||
call_command('rsu-duplicates', 'list')
|
||||
import pdb
|
||||
pdb.set_trace()
|
||||
|
|
|
@ -5,8 +5,6 @@ import json
|
|||
|
||||
import pytest
|
||||
|
||||
from zoo.zoo_data.models import Entity, Transaction
|
||||
|
||||
# Ces tests ne sont pas exécutables publiquement car ils nécessitent des
|
||||
# données personnelles pour être efficaces:
|
||||
# Pour générer ces données faire dans psql:
|
||||
|
@ -32,34 +30,6 @@ from zoo.zoo_data.models import Entity, Transaction
|
|||
# En l'absence de ces fichiers les tests sont ignorés.
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def lot_of_names(rsu_schema):
|
||||
try:
|
||||
reader = csv.reader(open('tests/export-etat-civil.csv'))
|
||||
except IOError:
|
||||
pytest.skip()
|
||||
|
||||
def generate():
|
||||
tr = Transaction.objects.create()
|
||||
for genre, statut_legal, prenoms, nom_de_naissance, nom_d_usage, date_de_naissance in reader:
|
||||
yield Entity(
|
||||
schema=rsu_schema['individu'],
|
||||
created=tr,
|
||||
content={
|
||||
'genre': genre,
|
||||
'prenoms': prenoms,
|
||||
'nom_de_naissance': nom_de_naissance,
|
||||
'nom_d_usage': nom_d_usage,
|
||||
'date_de_naissance': date_de_naissance,
|
||||
'telephones': [],
|
||||
'statut_legal': statut_legal,
|
||||
'email': '',
|
||||
'cles_de_federation': {},
|
||||
})
|
||||
|
||||
Entity.objects.bulk_create(generate())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def basic_searches():
|
||||
try:
|
||||
|
|
|
@ -57,10 +57,7 @@ def find_duplicates(limit=None, base_limit=None, queryset=None, days=None, count
|
|||
continue
|
||||
# search for duplicate based on the name
|
||||
s = search.copy()
|
||||
if first.content['nom_de_naissance']:
|
||||
s = search.search_name(u'%s %s' % (first.content['prenoms'], first.content['nom_de_naissance']))
|
||||
if first.content['nom_d_usage']:
|
||||
s = s.search_name(u'%s %s' % (first.content['prenoms'], first.content['nom_d_usage']))
|
||||
s = s.search_individu(first)
|
||||
if first.content.get('date_de_naissance'):
|
||||
s = s.search_birthdate(
|
||||
datetime.datetime.strptime(
|
||||
|
|
|
@ -355,6 +355,53 @@ class PersonSearch(object):
|
|||
|
||||
return self.search_name(fullname)
|
||||
|
||||
def search_individu(self, individu, factor=1.0, first_name_weight=1.0, last_name_weight=1.0):
|
||||
self = copy.deepcopy(self)
|
||||
|
||||
prenoms = individu.content['prenoms']
|
||||
nom_de_naissance = individu.content['nom_de_naissance']
|
||||
nom_d_usage = individu.content['nom_d_usage']
|
||||
|
||||
# Create the simple filter
|
||||
prenoms_expression = Value(1.0) - TrigramDistance(Normalize(
|
||||
JSONTextRef(F('content'), 'prenoms')), self.luv(prenoms))
|
||||
nn_nn_expression = Value(1.0) - TrigramDistance(Normalize(
|
||||
JSONTextRef(F('content'), 'nom_de_naissance')), self.luv(nom_de_naissance))
|
||||
nu_nu_expression = Value(1.0) - TrigramDistance(Normalize(
|
||||
JSONTextRef(F('content'), 'nom_d_usage')), self.luv(nom_d_usage))
|
||||
nn_nu_expression = Value(1.0) - TrigramDistance(Normalize(
|
||||
JSONTextRef(F('content'), 'nom_de_naissance')), self.luv(nom_d_usage))
|
||||
nu_nn_expression = Value(1.0) - TrigramDistance(Normalize(
|
||||
JSONTextRef(F('content'), 'nom_d_usage')), self.luv(nom_de_naissance))
|
||||
q_noms = []
|
||||
if nom_de_naissance:
|
||||
q_noms.append(self.q_normalize('nom_de_naissance', nom_de_naissance))
|
||||
self.name_similarities.append(
|
||||
(first_name_weight * prenoms_expression
|
||||
+ last_name_weight * nn_nn_expression)
|
||||
/ Value(first_name_weight + last_name_weight))
|
||||
self.name_similarities.append(
|
||||
(first_name_weight * prenoms_expression
|
||||
+ last_name_weight * nu_nn_expression)
|
||||
/ Value(first_name_weight + last_name_weight))
|
||||
q_noms.append(self.q_normalize('nom_d_usage', nom_de_naissance))
|
||||
if nom_d_usage:
|
||||
q_noms.append(self.q_normalize('nom_d_usage', nom_d_usage))
|
||||
q_noms.append(self.q_normalize('nom_de_naissance', nom_d_usage))
|
||||
self.name_similarities.append(
|
||||
(first_name_weight * prenoms_expression
|
||||
+ last_name_weight * nu_nu_expression)
|
||||
/ Value(first_name_weight + last_name_weight))
|
||||
self.name_similarities.append(
|
||||
(first_name_weight * prenoms_expression
|
||||
+ last_name_weight * nn_nu_expression)
|
||||
/ Value(first_name_weight + last_name_weight))
|
||||
q = self.q_normalize('prenoms', prenoms) & reduce(operator.__or__, q_noms)
|
||||
self.add_filter('name', q)
|
||||
|
||||
# Compute similarity score
|
||||
return self
|
||||
|
||||
def copy(self):
|
||||
return copy.deepcopy(self)
|
||||
|
||||
|
|
Loading…
Reference in New Issue