56 lines
2.1 KiB
Python
56 lines
2.1 KiB
Python
# passerelle - uniform access to multiple data sources and services
|
|
# Copyright (C) 2022 Entr'ouvert
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify it
|
|
# under the terms of the GNU Affero General Public License as published
|
|
# by the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
from unicodedata import category
|
|
from unicodedata import normalize as unormalize
|
|
|
|
from Levenshtein import distance as ldistance
|
|
|
|
|
|
def normalize(s):
|
|
return ''.join(c for c in unormalize('NFKD', s).casefold() if category(c) not in ('Mn', 'Zs'))
|
|
|
|
|
|
def simple_match(mandataire, first_name, last_name, birthdate):
|
|
if any([attr not in mandataire for attr in ['prenom', 'nom', 'date_naissance']]):
|
|
return False
|
|
if normalize(mandataire['prenom'].replace(',', ' ').split(maxsplit=1)[0]) != normalize(first_name):
|
|
return False
|
|
if normalize(mandataire['nom']) != normalize(last_name):
|
|
return False
|
|
if mandataire['date_naissance'].replace('-', '') != birthdate:
|
|
return False
|
|
return True
|
|
|
|
|
|
def levenshtein_match(mandataire, first_name, last_name, birthdate):
|
|
if any([attr not in mandataire for attr in ['prenom', 'nom', 'date_naissance']]):
|
|
return False
|
|
dist_first_name = min(2, int(len(first_name) / 4))
|
|
dist_last_name = min(2, int(len(last_name) / 4))
|
|
if (
|
|
ldistance(
|
|
normalize(mandataire['prenom'].replace(',', ' ').split(maxsplit=1)[0]), normalize(first_name)
|
|
)
|
|
> dist_first_name
|
|
):
|
|
return False
|
|
if ldistance(normalize(mandataire['nom']), normalize(last_name)) > dist_last_name:
|
|
return False
|
|
if mandataire['date_naissance'].replace('-', '') != birthdate:
|
|
return False
|
|
return True
|