add cd06/senior scripts

This commit is contained in:
Benjamin Dauvergne 2020-09-15 13:58:23 +02:00
parent 310ce4afad
commit 37489d412e
9 changed files with 381 additions and 0 deletions

7
cd06/senior/analyse.py Normal file
View File

@ -0,0 +1,7 @@
import sys
import loader
import re
keys, data = loader.load(sys.argv[1])
mobiles = [row['Tel_Portable'] for row in data if row['Tel_Portable']]

View File

@ -0,0 +1,154 @@
import sys
import loader
from django.db.transaction import atomic
from authentic2.custom_user.models import User
from authentic2.a2_rbac.models import OrganizationalUnit as OU
from django.contrib.contenttypes.models import ContentType
from authentic2.models import UserExternalId, Attribute, AttributeValue
user_ct = ContentType.objects.get_for_model(User)
user_attributes_mapping = {
'Prenom': 'first_name',
'Nom': 'last_name',
'Nom_JF': 'nom_de_naissance',
'Civilite': 'title',
'birthdate': 'birthdate',
'Lieu_Naissance': 'lieu_de_naissance',
'tel_mobile': 'mobile',
'tel_fixe': 'phone',
'NO_Voie': 'address',
'Batiment_Residence': 'complement_d_adresse',
'CP': 'zipcode',
'Ville': 'city',
}
attributes = {attribute.name: attribute for attribute in Attribute.objects.filter(name__in=user_attributes_mapping.values())}
assert set(attributes.keys()) == set(user_attributes_mapping.values())
logger = loader.logger
keys, data = loader.load(sys.argv[1])
with atomic():
senior_ou, created = OU.objects.update_or_create(slug='senior', defaults={'name': 'Sénior'})
guids = {row['guid']: row for row in data}
user_by_guid = {user.uuid: user for user in User.objects.filter(uuid__in=guids.keys())}
to_bulk_create = []
to_save = []
logger.info('Creating users...')
for i, row in enumerate(data):
guid = row['guid']
user = user_by_guid.get(guid, User(uuid=guid))
save = not bool(user.pk)
defaults = {
'ou': senior_ou,
'first_name': row['Prenom'],
'last_name': row['Nom'],
'email': row['email'] or '',
'email_verified': bool(row['Email']),
}
for key, value in defaults.items():
if getattr(user, key) != value:
setattr(user, key, value)
save = True
if save:
if user.pk:
to_save.append(user)
else:
to_bulk_create.append(user)
User.objects.bulk_create(to_bulk_create)
for user in to_save:
user.save()
user_by_guid = {user.uuid: user for user in User.objects.filter(uuid__in=guids.keys())}
user_ids = set(user.pk for user in user_by_guid.values())
external_id_by_user = {uei.user_id: uei for uei in UserExternalId.objects.filter(user_id__in=user_ids)}
logger.info('Created %s users...', len(to_bulk_create))
logger.info('Updated %s users...', len(to_save))
logger.info('Creating UserExternalId...')
to_bulk_create = []
to_save = []
for user in user_by_guid.values():
ppid = guids[user.uuid]['ppid']
uei = external_id_by_user.get(user.pk, UserExternalId(user=user, source='eudonet'))
save = not bool(uei.pk)
if uei.source != 'eudonet':
uei.source = 'eudonet'
save = True
if uei.external_id != ppid:
uei.external_id = ppid
save = True
if save:
if uei.pk:
to_save.append(uei)
else:
to_bulk_create.append(uei)
UserExternalId.objects.bulk_create(to_bulk_create)
for uei in to_save:
uei.save()
logger.info('Created %s user external ids...', len(to_bulk_create))
logger.info('Updated %s user external ids...', len(to_save))
for eudonet_key, attribute_name in user_attributes_mapping.items():
attribute = attributes[attribute_name]
serialize = attribute.get_kind()['serialize']
to_bulk_create = []
to_save = []
to_delete = []
logger.info('Creating attributes %s...', attribute_name)
atvs = {atv.object_id: atv for atv in AttributeValue.objects.filter(object_id__in=user_ids, attribute=attribute)}
for row in data:
user = user_by_guid[row['guid']]
value = row[eudonet_key]
atv = atvs.get(user.pk, AttributeValue(
content_type=user_ct,
object_id=user.pk,
attribute=attribute,
verified=False,
multiple=False))
if not value:
if atv.pk:
to_delete.append(atv.pk)
else:
serialized = serialize(value)
if not atv.pk:
atv.content = serialized
to_bulk_create.append(atv)
elif atv.content != serialized:
atv.content = serialized
to_save.append(atv)
AttributeValue.objects.bulk_create(to_bulk_create)
for atv in to_save:
atv.save()
AttributeValue.objects.filter(pk__in=to_delete).delete()
logger.info('Created %s %s attributes ...', len(to_bulk_create), attribute_name)
logger.info('Updated %s %s attributes ...', len(to_save), attribute_name)
logger.info('Deleted %s %s attributes ...', len(to_delete), attribute_name)
if len(sys.argv) < 3 or sys.argv[2] != 'nofake':
raise ValueError('fake')

220
cd06/senior/loader.py Normal file
View File

@ -0,0 +1,220 @@
import hashlib
import csv
import logging
import re
import datetime
import uuid
logger = logging.getLogger('eudonet')
logger.propagate = False
handler = logging.StreamHandler()
handler.level = logging.WARNING
handler.setFormatter(
logging.Formatter('%(asctime)-15s %(levelname)s %(message)s'))
logger.addHandler(handler)
# Accepte_Doc
# Adr_Personnelle
# Adresse_Principale
# Annee_Inscription
# Annotations_particulières2
# Archivé
# Autorise_Photos
# Batiment_Residence
# CP
# Certificat_Medical
# Civilite
# Date_Naissance
# Email
# Habite_Pas_Adresse_Indiquee
# ID_Conjoint
# Lien_relationnel
# Lieu_Inscription
# Lieu_Naissance
# NO_A_Contacter
# NO_Voie
# Nom
# Nom_Conjoint
# Nom_JF
# Personne_A_Contacter
# Prenom
# Prenom_Conjoint
# Profil_Contact
# Tel_Portable
# Téléphone_fixe
# URL_Historique_actvites
# Ville
# ppid
expected_keys = set([
'ppid',
'Nom',
'Nom_JF',
'Prenom',
'Civilite',
'Date_Naissance',
'Lieu_Naissance',
'ID_Conjoint',
'Nom_Conjoint',
'Prenom_Conjoint',
'Tel_Portable',
'Autorise_Photos',
'Annee_Inscription',
'Lieu_Inscription',
'Accepte_Doc',
'Certificat_Medical',
'Personne_A_Contacter',
'NO_A_Contacter',
'Lien_relationnel',
'Annotations_particulières2',
'Profil_Contact',
'Archivé',
'Téléphone_fixe',
'NO_Voie',
'Batiment_Residence',
'CP',
'Ville',
'Email',
'Adr_Personnelle',
'Adresse_Principale',
'Habite_Pas_Adresse_Indiquee',
'URL_Historique_actvites',
])
salt = b'eudonet'
uuids = set()
def telephone(row, key):
mobile = row[key]
if mobile:
mobile = mobile.strip()
if mobile == 'NULL':
mobile = ''
else:
mobile = re.sub(r'[\s.-]', '', mobile).strip()
if not mobile.isascii() or not mobile.isdigit() or len(mobile) not in (9, 10):
logger.warning(f'line {row["line"]} : invalid {key} {row[key]}')
mobile = ''
if len(mobile) == 9:
mobile = '0' + mobile
return mobile
ppids = set()
def normalize(row, ppid_index):
row['source'] = 'eudonet'
# convert NULL to None
for key in row:
if row[key] == 'NULL':
row[key] = None
elif key != 'line':
row[key] = row[key].strip()
row['import'] = True
line = row['line']
assert row['ppid'], 'no ppid line %s' % line
assert row['Prenom'], 'no Prenom line %s' % line
assert row['Nom'], 'no Nom line %s' % line
assert row['ppid'] not in ppids, 'duplicate ppid line %s' % line
ppids.add(row['ppid'])
assert row['Civilite'] in (None, 'Monsieur', 'Madame')
# email
email = row['Email']
if email:
if not re.match(r'^[a-zA-Z0-9_.-]+@[a-zA-Z0-9.-]+$', email):
logger.warning(f'line {line} : invalid Email {row["Email"]}')
email = None
row['email'] = email
# Archivé
if row['Archivé'] == 'OUI':
logger.warning('line {line} Archivé==OUI')
row['import'] = False
# UUID
guid = uuid.UUID(hashlib.md5(salt + row['ppid'].encode()).hexdigest()[:32]).hex
assert guid not in uuids, f'uuid duplicate {uuid}'
uuids.add(guid)
row['guid'] = guid
logger.debug('uuid %s', guid)
# Tel_Portable
row['tel_mobile'] = telephone(row, 'Tel_Portable')
row['tel_fixe'] = telephone(row, 'Téléphone_fixe')
# NO_A_Contacter cannot be fixed
row['tel_a_contacter'] = (row['NO_A_Contacter'] or '').strip() or None
# ID_Conjoint
id_conjoint = (row['ID_Conjoint'] or '').strip()
if id_conjoint:
id_conjoint = ppid_index.get(id_conjoint)
if not id_conjoint:
logger.warning(f'line {line} : unknown ID_Conjoint {row["ID_Conjoint"]}')
row['conjoint_index'] = id_conjoint
# Date_Naissance
birthdate = row['Date_Naissance']
if birthdate:
birthdate = birthdate.strip()
try:
birthdate = datetime.datetime.strptime(birthdate, '%d/%m/%Y').date()
except ValueError:
try:
birthdate = datetime.datetime.strptime(birthdate, '%Y-%m-%d').date()
except ValueError:
logger.warning(f'line {row["line"]} : invalid Date_Naissance {row["Date_Naissance"]}')
row['birthdate'] = birthdate
# convert Accepte_Doc to three fields
accepte_doc = row['Accepte_Doc']
row['canaux_contact'] = []
row['non_envoie_brochure'] = False
row['adresse_erreur'] = False
if accepte_doc is None:
pass
elif accepte_doc == 'NON':
pass
elif accepte_doc == 'Brochure au conjoint':
row['canaux_contact'] = ['courrier']
row['non_envoie_brochure'] = True
elif accepte_doc == 'Par courrier':
row['canaux_contact'] = ['courrier']
elif accepte_doc == 'Par email':
row['canaux_contact'] = ['email']
elif accepte_doc == 'Erreur adresse':
row['canaux_contact'] = ['courrier']
else:
raise ValueError('invalid Accepte_Doc %r' % accepte_doc)
def load(filename):
logger.info(f'Loading rows of {filename}')
with open(filename) as fd:
reader = csv.DictReader(fd)
data = list(reader)
ppid_index = {row['ppid'].strip(): i for i, row in enumerate(data)}
for i, row in enumerate(data):
assert set(row.keys()) == expected_keys, f'row {i+1} keys differ: {row.keys()} != {keys}'
error = False
for i, row in enumerate(data):
row['line'] = (i + 1)
logger.info(f'Loading row {i + 1:05d}')
try:
normalize(row, ppid_index)
except ValueError:
error = True
assert not error
return reader._fieldnames, [row for row in data if row['import']]