nanterre: ajout d'un mode simulé et d'un rapport à la commande d'import

This commit is contained in:
Benjamin Dauvergne 2017-07-26 14:12:20 +02:00 committed by Thomas NOEL
parent a043516427
commit 3648411eea
3 changed files with 214 additions and 41 deletions

View File

@ -13,4 +13,7 @@ import:
createdb nanterre_rsu
psql nanterre_rsu <nrsu.sql
psql nanterre_rsu <swarm_nanterre.sql
time ./manage.py rsu-load-dump "dbname=nanterre_rsu" authentic_users.json
make -C load
load:
time ./manage.py rsu-load-dump --verbosity=2 --dry "dbname=nanterre_rsu" authentic_users.json

View File

@ -22,7 +22,8 @@ from zoo.zoo_nanterre.utils import LoadDump
class Command(BaseCommand):
def handle(self, pg_dsn, authentic_fixture_path, **options):
LoadDump(pg_dsn, authentic_fixture_path, verbosity=options['verbosity']).load()
LoadDump(pg_dsn, authentic_fixture_path, verbosity=options['verbosity'],
dry=options['dry']).load()
def add_arguments(self, parser):
parser.add_argument('pg_dsn',
@ -30,3 +31,7 @@ class Command(BaseCommand):
'dbname=nanterre_rsu')
parser.add_argument('authentic_fixture_path',
help='path for the authentic fixture containing existing SWARM users')
parser.add_argument('--dry',
action='store_true',
default=False,
help='simulate')

View File

@ -19,6 +19,7 @@
from __future__ import print_function
import six
import functools
import uuid
import sys
import re
@ -39,6 +40,7 @@ from django.db.models import Q, F, Value
from django.db.models.functions import Least, Greatest, Coalesce, Concat
from django.db import transaction
from django.utils.timezone import now, make_aware
from django.contrib.auth.hashers import make_password
from zoo.zoo_meta.models import EntitySchema, RelationSchema
from zoo.zoo_data.models import Entity, Relation, Transaction, Log
@ -554,6 +556,10 @@ class UserFixture(object):
def write_user(self, swarmid, username, password, first_name, last_name, email, last_login):
uid = 'swarmid-%s' % swarmid
date_joined = now().isoformat()
if password:
password = 'bcrypt$' + password
else:
password = make_password(uuid.uuid4().hex)
d = {
'model': 'custom_user.user',
'fields': {
@ -563,7 +569,7 @@ class UserFixture(object):
'last_login': make_aware(last_login).isoformat(),
'username': username,
'uuid': uid,
'password': 'bcrypt$' + password,
'password': password,
'modified': date_joined,
'date_joined': date_joined,
}
@ -582,9 +588,14 @@ class UserFixture(object):
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
class DryLoad(Exception):
pass
class LoadDump(object):
def __init__(self, pg_dsn, authentic_fixture_path, verbosity=1):
EMAIL_RE = re.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)")
def __init__(self, pg_dsn, authentic_fixture_path, verbosity=1, dry=False):
self.authentic_fixture_path = authentic_fixture_path
self.connection = psycopg2.connect(dsn=pg_dsn)
self.cursor = self.connection.cursor()
@ -592,6 +603,9 @@ class LoadDump(object):
self.tr = Transaction.objects.create(meta="initial import")
self.clock_stack = []
self.verbosity = verbosity
self.dry = dry
for cause in ['RG1', 'RG2', 'RG4', 'RG3']:
self.counters['individus.ignores.%s' % cause] = 0
def start(self, msg=None):
if msg and self.verbosity > 0:
@ -613,28 +627,57 @@ class LoadDump(object):
return decorator
@log()
@transaction.atomic
def load(self):
self.reset_db()
self.load_swarm()
self.load_individuals()
self.load_addresses()
self.load_relations()
try:
with transaction.atomic():
self.reset_db()
self.load_individuals()
self.load_swarm()
self.load_addresses()
self.load_relations()
if self.dry:
raise DryLoad
except DryLoad:
pass
# tally counters
if self.verbosity > 1:
print()
print('Rapport d\'import :')
print()
for key in sorted(self.counters):
print(' %-80s' % key, ':', '%10d' % self.counters[key])
print()
print(' Compteur de séquence SAGA :', self.saga_sequence)
@log('Resetting db... ')
def reset_db(self):
Relation.objects.all().delete()
Entity.objects.all().delete()
if not self.dry:
# pas besoin de nettoyer si on ne va pas garder les modifications
Relation.objects.all().delete()
Entity.objects.all().delete()
@log('Loading swarm users... ')
def load_swarm(self):
user_fixture = UserFixture(self.authentic_fixture_path)
self.cursor.execute("""SELECT id, username, password, firstname, lastname, email, last_login FROM users
WHERE status = 'active'""")
self.swarmid_mapping = swarmid_mapping = {}
self.cursor.execute('''SELECT id, username, password, firstname, lastname, email,
last_login, status FROM users''')
for (swarmid, username, password, first_name, last_name, email,
last_login) in self.cursor.fetchall():
uid = user_fixture.write_user(
last_login, status) in self.cursor.fetchall():
self.counters['swarmids'] += 1
self.counters['swarmids.%s' % status] += 1
if swarmid not in self.swarmids:
self.counters['swarmids.ignores.individu_ignore'] += 1
continue
if status == 'suspended':
# RG14: on ne garde pas les mots de passe des comptes suspendus
password = None
# RG14: on ne garde pas les comptes inactifs
if status == 'inactive':
self.counters['swarmids.ignores.inactive'] += 1
continue
self.counters['swarmids.importes'] += 1
self.counters['swarmids.importes.%s' % status] += 1
user_fixture.write_user(
swarmid=swarmid,
username=username,
password=password,
@ -642,22 +685,40 @@ class LoadDump(object):
last_name=last_name,
email=email,
last_login=last_login)
swarmid_mapping[swarmid] = uid
user_fixture.close()
def ignore_individu(self, cause, individualid):
self.counters['individus.ignores'] += 1
self.counters['individus.ignores.%s' % cause] += 1
@log('Loading individual... ')
def load_individuals(self):
self.cursor.execute('''SELECT id, swarmid, gender, firstname, lastname, nameofuse, email, phones::json,
legalstatus, birthdate, mappings::json
FROM individual''')
sql = '''
WITH agg AS (SELECT ind.id as id, array_agg(ir1.relationtype) as relt1,
array_agg(ir2.relationtype) as relt2,
array_agg(users.id) as swarmids
FROM individual as ind
LEFT JOIN individualrelation as ir1 ON ir1.subjectid = ind.id
LEFT JOIN individualrelation as ir2 ON ir2.responsibleid = ind.id
LEFT JOIN users ON users.id = ind.swarmid GROUP BY ind.id)
SELECT DISTINCT ON (ind.id) ind.id, ind.state, ind.swarmid, ind.gender, ind.firstname, ind.lastname,
ind.nameofuse, ind.email, ind.phones::json,
ind.legalstatus, ind.birthdate, ind.mappings::json,
agg.relt1, agg.relt2, agg.swarmids
FROM individual as ind, agg WHERE ind.id = agg.id'''
self.cursor.execute(sql)
individu_batch = []
individu_schema = EntitySchema.objects.get(slug='individu')
self.individu_mapping = individu_mapping = {}
self.swarmids = set()
for (individualid, state, swarmid, gender, firstname, lastname, nameofuse, email, phones,
legalstatus, birthdate, mappings, subject_relations, actor_relations, swarmids) in self.cursor.fetchall():
# comptage des individus
self.counters['individus'] += 1
for (individualid, swarmid, gender, firstname, lastname, nameofuse, email, phones,
legalstatus, birthdate, mappings) in self.cursor.fetchall():
if gender == 'Female':
genre = 'femme'
elif gender == 'Male':
@ -676,6 +737,14 @@ class LoadDump(object):
else:
raise NotImplementedError('unknown legalstatus: %s' % legalstatus)
self.counters['individus.%s' % statut_legal] += 1
self.counters['individus.%s.%s' % (statut_legal, genre)] += 1
if email:
self.counters['emails'] += 1
for key in mappings:
self.counters['cles_de_federation.%s' % key] += 1
telephones = []
for phone in phones:
if phone['phoneType'] == 'OtherPhone':
@ -691,22 +760,85 @@ class LoadDump(object):
telephones.append({'type': kind, 'numero': phone['number']})
mappings = mappings or {}
if swarmid in self.swarmid_mapping:
mappings['authentic'] = self.swarmid_mapping[swarmid]
if swarmid and swarmids:
mappings['authentic'] = 'swarmid-%s' % swarmid
# RG8, RG9, RG9bis: reprise nom de naissance, nom d'usage
# on a tout on garde
if lastname and nameofuse:
nom_de_naissance = lastname.upper()
nom_d_usage = nameofuse.upper()
# cas des enfants et des hommes
elif statut_legal == 'mineur' or genre == 'homme':
nom_de_naissance = nameofuse.upper()
nom_d_usage = u''
# cas des femmes
else:
nom_de_naissance == ''
nom_d_usage = nameofuse.upper()
content = {
'genre': genre,
'prenoms': firstname.upper(),
'nom_de_naissance': lastname.upper() if lastname else '',
'nom_d_usage': nameofuse.upper(),
'nom_de_naissance': nom_de_naissance,
'nom_d_usage': nom_d_usage,
'statut_legal': statut_legal,
'date_de_naissance': birthdate.isoformat(),
'cles_de_federation': mappings,
}
if not mappings and not subject_relations and not actor_relations:
# individu ignoré RG1: pas de clés, pas de réseau (pas de relations)
self.ignore_individu('RG1', individualid)
continue
if not mappings:
new_cursor = self.connection.cursor()
new_cursor.execute('''
SELECT DISTINCT(ind.id), ind.mappings::jsonb
FROM individual as ind
INNER JOIN individualrelation as ir ON
(ind.id = ir.subjectid AND ir.responsibleid = %s) OR
(ind.id = ir.responsibleid AND ir.subjectid = %s)''',
(individualid, individualid))
no_mappings = all(not reseau_mappings for reseau_id, reseau_mappings in
new_cursor.fetchall())
if no_mappings:
# individu ignoré RG2: pas de clés, et réseau sans clés
self.ignore_individu('RG2', individualid)
continue
if state == 'invalid':
# individu ignoré RG3: ficher invalide
self.ignore_individu('RG3', individualid)
continue
if state == 'invisible' and not mappings:
# individu ignoré RG4: fiche invisible et aucune clé
self.ignore_individu('RG4', individualid)
continue
if state == 'archived':
# individu ignoré RG5: fiche archivée
self.ignore_individu('RG5', individualid)
continue
if state == 'temp':
# individu ignoré RG5: fiche archivée
self.ignore_individu('RG6', individualid)
continue
if telephones:
content['telephones'] = telephones
if email:
content['email'] = email
e = Entity(id=individualid,
self.counters['emails_importables'] += 1
email = email.strip()
if self.EMAIL_RE.match(email):
content['email'] = email
else: # RG11: on ignore les emails mal formattée
self.counters['emails_importables.ignores.RG11'] += 1
self.counters['individus_importes.%s.%s' % (statut_legal, genre)] += 1
for key in mappings:
self.counters['cles_de_federation.importes'] += 1
self.counters['cles_de_federation.importes.%s' % key] += 1
# enregistre les swarmid à importer
self.swarmids.add(swarmid)
new_id = individualid
if self.dry:
new_id = None
e = Entity(id=new_id,
created=self.tr,
schema=individu_schema,
content=content)
@ -717,14 +849,20 @@ class LoadDump(object):
max_tiers_saga = 0
for tiers_saga, individualid in self.cursor.fetchall():
self.counters['cles_de_federation.saga_tiers'] += 1
if individualid not in individu_mapping:
continue
self.counters['cles_de_federation.importes'] += 1
self.counters['cles_de_federation.importes.saga_tiers'] += 1
cles = individu_mapping[individualid].content['cles_de_federation']
max_tiers_saga = max(max_tiers_saga, int(str(tiers_saga[1:])))
max_tiers_saga = max(max_tiers_saga, int(str(tiers_saga[2:])))
cles['saga_tiers'] = str(tiers_saga)
Entity.objects.bulk_create(individu_batch)
connection.cursor().execute("SELECT setval(pg_get_serial_sequence('zoo_data_entity', 'id'),"
" coalesce(max(id),0) + 1, false) FROM zoo_data_entity")
set_saga_sequence(max_tiers_saga + 1)
self.saga_sequence = max_tiers_saga + 1
set_saga_sequence(self.saga_sequence)
@log('Loading addresses... ')
def load_addresses(self):
@ -758,11 +896,22 @@ class LoadDump(object):
ia.addressid = a.id''')
for individualid, addressid, is_primary, streetnumber, streetnumberext, streetname, streetmatriculation, \
ext1, ext2, at, city, zipcode, country, inseecode in self.cursor.fetchall():
if individualid not in self.individu_mapping:
continue
self.counters['relations_adresse'] += 1
# RG10 bis: on retire le préfixe chez
if at and at.lower().strip().startswith('chez'):
at = at.strip()[4:].strip()
if addressid not in adresse_mapping:
self.counters['adresses'] += 1
if individualid not in self.individu_mapping:
self.counters['relations_adresse.ignores'] += 1
self.counters['relations_adresse.ignores.individu_ignore'] += 1
continue
if streetnumber:
streetnumber = streetnumber.strip()
content = {
'streetnumber': streetnumber,
# RG10: initialisation du numéro de rue à zéro si vide
'streetnumber': streetnumber or '0',
'streetnumberext': streetnumberext,
'streetname': streetname,
'streetmatriculation': streetmatriculation,
@ -777,8 +926,13 @@ class LoadDump(object):
e = Entity(created=self.tr, schema=adresse_schema, content=content)
adresse_batch.append(e)
adresse_mapping[addressid] = e
if individualid not in self.individu_mapping:
self.counters['relations_adresse.ignores.individu_ignore'] += 1
continue
self.counters['relations_adresse.importes'] += 1
individu_adresse_mapping[(individualid, addressid)] = is_primary
self.counters['adresses.importes'] = len(adresse_mapping)
Entity.objects.bulk_create(adresse_batch)
relation_batch = []
@ -811,14 +965,12 @@ class LoadDump(object):
relation_batch = []
seen = set()
for label, relationtype, responsibleid, subjectid in self.cursor.fetchall():
self.counters['relations'] += 1
self.counters['relations.%s' % relationtype] += 1
self.counters['relations.%s.%s' % (relationtype, label)] += 1
if relationtype == 'SituationFamiliale':
key = (min(responsibleid, subjectid), max(responsibleid, subjectid))
if key in seen:
continue
seen.add(key)
schema = union_schema
if label == 'Marie':
kind = 'pacs/mariage'
@ -839,11 +991,24 @@ class LoadDump(object):
elif label == 'RepresentantPersonneMoraleQualifiee':
kind = 'representant_personne_morale_qualifiee'
elif label == 'Tuteur':
kind = 'tuteur'
# conversion tutelle en tiers de confiance
self.counters['relations_modifies.RG7bis'] += 1
kind = 'tiers_de_confiance'
elif label == 'Curatelle':
self.counters['relations.ignores.RG7' % relationtype] += 1
continue
else:
raise NotImplementedError('unknown label for relationtype: %s, %s'
% (label, relationtype))
content = {'statut': kind}
if responsibleid not in self.individu_mapping or subjectid not in self.individu_mapping:
self.counters['relations.ignores.individu_ignore'] += 1
continue
if relationtype == 'ResponsabiliteLegale' and label == 'Tuteur':
self.counters['relations_modifies.RG7bis.importes'] += 1
self.counters['relations_importes'] += 1
self.counters['relations_importes.%s' % relationtype] += 1
self.counters['relations_importes.%s.%s' % (relationtype, kind)] += 1
e = Relation(created=self.tr, schema=schema, left=self.individu_mapping[responsibleid],
right=self.individu_mapping[subjectid], content=content)
relation_batch.append(e)