passerelle/passerelle/apps/api_entreprise/utils.py

56 lines
2.1 KiB
Python

# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2022 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from unicodedata import category
from unicodedata import normalize as unormalize
from Levenshtein import distance as ldistance
def normalize(s):
return ''.join(c for c in unormalize('NFKD', s).casefold() if category(c) not in ('Mn', 'Zs'))
def simple_match(mandataire, first_name, last_name, birthdate):
if any([attr not in mandataire for attr in ['prenom', 'nom', 'date_naissance']]):
return False
if normalize(mandataire['prenom'].replace(',', ' ').split(maxsplit=1)[0]) != normalize(first_name):
return False
if normalize(mandataire['nom']) != normalize(last_name):
return False
if mandataire['date_naissance'].replace('-', '') != birthdate:
return False
return True
def levenshtein_match(mandataire, first_name, last_name, birthdate):
if any([attr not in mandataire for attr in ['prenom', 'nom', 'date_naissance']]):
return False
dist_first_name = min(2, int(len(first_name) / 4))
dist_last_name = min(2, int(len(last_name) / 4))
if (
ldistance(
normalize(mandataire['prenom'].replace(',', ' ').split(maxsplit=1)[0]), normalize(first_name)
)
> dist_first_name
):
return False
if ldistance(normalize(mandataire['nom']), normalize(last_name)) > dist_last_name:
return False
if mandataire['date_naissance'].replace('-', '') != birthdate:
return False
return True