nanterre: ajout d'un mode simulé et d'un rapport à la commande d'import

This commit is contained in:
Benjamin Dauvergne 2017-07-26 14:12:20 +02:00 committed by Thomas NOEL
parent a043516427
commit 3648411eea
3 changed files with 214 additions and 41 deletions

View File

@ -13,4 +13,7 @@ import:
createdb nanterre_rsu createdb nanterre_rsu
psql nanterre_rsu <nrsu.sql psql nanterre_rsu <nrsu.sql
psql nanterre_rsu <swarm_nanterre.sql psql nanterre_rsu <swarm_nanterre.sql
time ./manage.py rsu-load-dump "dbname=nanterre_rsu" authentic_users.json make -C load
load:
time ./manage.py rsu-load-dump --verbosity=2 --dry "dbname=nanterre_rsu" authentic_users.json

View File

@ -22,7 +22,8 @@ from zoo.zoo_nanterre.utils import LoadDump
class Command(BaseCommand): class Command(BaseCommand):
def handle(self, pg_dsn, authentic_fixture_path, **options): def handle(self, pg_dsn, authentic_fixture_path, **options):
LoadDump(pg_dsn, authentic_fixture_path, verbosity=options['verbosity']).load() LoadDump(pg_dsn, authentic_fixture_path, verbosity=options['verbosity'],
dry=options['dry']).load()
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument('pg_dsn', parser.add_argument('pg_dsn',
@ -30,3 +31,7 @@ class Command(BaseCommand):
'dbname=nanterre_rsu') 'dbname=nanterre_rsu')
parser.add_argument('authentic_fixture_path', parser.add_argument('authentic_fixture_path',
help='path for the authentic fixture containing existing SWARM users') help='path for the authentic fixture containing existing SWARM users')
parser.add_argument('--dry',
action='store_true',
default=False,
help='simulate')

View File

@ -19,6 +19,7 @@
from __future__ import print_function from __future__ import print_function
import six import six
import functools import functools
import uuid
import sys import sys
import re import re
@ -39,6 +40,7 @@ from django.db.models import Q, F, Value
from django.db.models.functions import Least, Greatest, Coalesce, Concat from django.db.models.functions import Least, Greatest, Coalesce, Concat
from django.db import transaction from django.db import transaction
from django.utils.timezone import now, make_aware from django.utils.timezone import now, make_aware
from django.contrib.auth.hashers import make_password
from zoo.zoo_meta.models import EntitySchema, RelationSchema from zoo.zoo_meta.models import EntitySchema, RelationSchema
from zoo.zoo_data.models import Entity, Relation, Transaction, Log from zoo.zoo_data.models import Entity, Relation, Transaction, Log
@ -554,6 +556,10 @@ class UserFixture(object):
def write_user(self, swarmid, username, password, first_name, last_name, email, last_login): def write_user(self, swarmid, username, password, first_name, last_name, email, last_login):
uid = 'swarmid-%s' % swarmid uid = 'swarmid-%s' % swarmid
date_joined = now().isoformat() date_joined = now().isoformat()
if password:
password = 'bcrypt$' + password
else:
password = make_password(uuid.uuid4().hex)
d = { d = {
'model': 'custom_user.user', 'model': 'custom_user.user',
'fields': { 'fields': {
@ -563,7 +569,7 @@ class UserFixture(object):
'last_login': make_aware(last_login).isoformat(), 'last_login': make_aware(last_login).isoformat(),
'username': username, 'username': username,
'uuid': uid, 'uuid': uid,
'password': 'bcrypt$' + password, 'password': password,
'modified': date_joined, 'modified': date_joined,
'date_joined': date_joined, 'date_joined': date_joined,
} }
@ -582,9 +588,14 @@ class UserFixture(object):
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
class DryLoad(Exception):
pass
class LoadDump(object): class LoadDump(object):
def __init__(self, pg_dsn, authentic_fixture_path, verbosity=1): EMAIL_RE = re.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)")
def __init__(self, pg_dsn, authentic_fixture_path, verbosity=1, dry=False):
self.authentic_fixture_path = authentic_fixture_path self.authentic_fixture_path = authentic_fixture_path
self.connection = psycopg2.connect(dsn=pg_dsn) self.connection = psycopg2.connect(dsn=pg_dsn)
self.cursor = self.connection.cursor() self.cursor = self.connection.cursor()
@ -592,6 +603,9 @@ class LoadDump(object):
self.tr = Transaction.objects.create(meta="initial import") self.tr = Transaction.objects.create(meta="initial import")
self.clock_stack = [] self.clock_stack = []
self.verbosity = verbosity self.verbosity = verbosity
self.dry = dry
for cause in ['RG1', 'RG2', 'RG4', 'RG3']:
self.counters['individus.ignores.%s' % cause] = 0
def start(self, msg=None): def start(self, msg=None):
if msg and self.verbosity > 0: if msg and self.verbosity > 0:
@ -613,28 +627,57 @@ class LoadDump(object):
return decorator return decorator
@log() @log()
@transaction.atomic
def load(self): def load(self):
self.reset_db() try:
self.load_swarm() with transaction.atomic():
self.load_individuals() self.reset_db()
self.load_addresses() self.load_individuals()
self.load_relations() self.load_swarm()
self.load_addresses()
self.load_relations()
if self.dry:
raise DryLoad
except DryLoad:
pass
# tally counters
if self.verbosity > 1:
print()
print('Rapport d\'import :')
print()
for key in sorted(self.counters):
print(' %-80s' % key, ':', '%10d' % self.counters[key])
print()
print(' Compteur de séquence SAGA :', self.saga_sequence)
@log('Resetting db... ') @log('Resetting db... ')
def reset_db(self): def reset_db(self):
Relation.objects.all().delete() if not self.dry:
Entity.objects.all().delete() # pas besoin de nettoyer si on ne va pas garder les modifications
Relation.objects.all().delete()
Entity.objects.all().delete()
@log('Loading swarm users... ') @log('Loading swarm users... ')
def load_swarm(self): def load_swarm(self):
user_fixture = UserFixture(self.authentic_fixture_path) user_fixture = UserFixture(self.authentic_fixture_path)
self.cursor.execute("""SELECT id, username, password, firstname, lastname, email, last_login FROM users self.cursor.execute('''SELECT id, username, password, firstname, lastname, email,
WHERE status = 'active'""") last_login, status FROM users''')
self.swarmid_mapping = swarmid_mapping = {}
for (swarmid, username, password, first_name, last_name, email, for (swarmid, username, password, first_name, last_name, email,
last_login) in self.cursor.fetchall(): last_login, status) in self.cursor.fetchall():
uid = user_fixture.write_user( self.counters['swarmids'] += 1
self.counters['swarmids.%s' % status] += 1
if swarmid not in self.swarmids:
self.counters['swarmids.ignores.individu_ignore'] += 1
continue
if status == 'suspended':
# RG14: on ne garde pas les mots de passe des comptes suspendus
password = None
# RG14: on ne garde pas les comptes inactifs
if status == 'inactive':
self.counters['swarmids.ignores.inactive'] += 1
continue
self.counters['swarmids.importes'] += 1
self.counters['swarmids.importes.%s' % status] += 1
user_fixture.write_user(
swarmid=swarmid, swarmid=swarmid,
username=username, username=username,
password=password, password=password,
@ -642,22 +685,40 @@ class LoadDump(object):
last_name=last_name, last_name=last_name,
email=email, email=email,
last_login=last_login) last_login=last_login)
swarmid_mapping[swarmid] = uid
user_fixture.close() user_fixture.close()
def ignore_individu(self, cause, individualid):
self.counters['individus.ignores'] += 1
self.counters['individus.ignores.%s' % cause] += 1
@log('Loading individual... ') @log('Loading individual... ')
def load_individuals(self): def load_individuals(self):
self.cursor.execute('''SELECT id, swarmid, gender, firstname, lastname, nameofuse, email, phones::json, sql = '''
legalstatus, birthdate, mappings::json WITH agg AS (SELECT ind.id as id, array_agg(ir1.relationtype) as relt1,
FROM individual''') array_agg(ir2.relationtype) as relt2,
array_agg(users.id) as swarmids
FROM individual as ind
LEFT JOIN individualrelation as ir1 ON ir1.subjectid = ind.id
LEFT JOIN individualrelation as ir2 ON ir2.responsibleid = ind.id
LEFT JOIN users ON users.id = ind.swarmid GROUP BY ind.id)
SELECT DISTINCT ON (ind.id) ind.id, ind.state, ind.swarmid, ind.gender, ind.firstname, ind.lastname,
ind.nameofuse, ind.email, ind.phones::json,
ind.legalstatus, ind.birthdate, ind.mappings::json,
agg.relt1, agg.relt2, agg.swarmids
FROM individual as ind, agg WHERE ind.id = agg.id'''
self.cursor.execute(sql)
individu_batch = [] individu_batch = []
individu_schema = EntitySchema.objects.get(slug='individu') individu_schema = EntitySchema.objects.get(slug='individu')
self.individu_mapping = individu_mapping = {} self.individu_mapping = individu_mapping = {}
self.swarmids = set()
for (individualid, state, swarmid, gender, firstname, lastname, nameofuse, email, phones,
legalstatus, birthdate, mappings, subject_relations, actor_relations, swarmids) in self.cursor.fetchall():
# comptage des individus
self.counters['individus'] += 1
for (individualid, swarmid, gender, firstname, lastname, nameofuse, email, phones,
legalstatus, birthdate, mappings) in self.cursor.fetchall():
if gender == 'Female': if gender == 'Female':
genre = 'femme' genre = 'femme'
elif gender == 'Male': elif gender == 'Male':
@ -676,6 +737,14 @@ class LoadDump(object):
else: else:
raise NotImplementedError('unknown legalstatus: %s' % legalstatus) raise NotImplementedError('unknown legalstatus: %s' % legalstatus)
self.counters['individus.%s' % statut_legal] += 1
self.counters['individus.%s.%s' % (statut_legal, genre)] += 1
if email:
self.counters['emails'] += 1
for key in mappings:
self.counters['cles_de_federation.%s' % key] += 1
telephones = [] telephones = []
for phone in phones: for phone in phones:
if phone['phoneType'] == 'OtherPhone': if phone['phoneType'] == 'OtherPhone':
@ -691,22 +760,85 @@ class LoadDump(object):
telephones.append({'type': kind, 'numero': phone['number']}) telephones.append({'type': kind, 'numero': phone['number']})
mappings = mappings or {} mappings = mappings or {}
if swarmid in self.swarmid_mapping: if swarmid and swarmids:
mappings['authentic'] = self.swarmid_mapping[swarmid] mappings['authentic'] = 'swarmid-%s' % swarmid
# RG8, RG9, RG9bis: reprise nom de naissance, nom d'usage
# on a tout on garde
if lastname and nameofuse:
nom_de_naissance = lastname.upper()
nom_d_usage = nameofuse.upper()
# cas des enfants et des hommes
elif statut_legal == 'mineur' or genre == 'homme':
nom_de_naissance = nameofuse.upper()
nom_d_usage = u''
# cas des femmes
else:
nom_de_naissance == ''
nom_d_usage = nameofuse.upper()
content = { content = {
'genre': genre, 'genre': genre,
'prenoms': firstname.upper(), 'prenoms': firstname.upper(),
'nom_de_naissance': lastname.upper() if lastname else '', 'nom_de_naissance': nom_de_naissance,
'nom_d_usage': nameofuse.upper(), 'nom_d_usage': nom_d_usage,
'statut_legal': statut_legal, 'statut_legal': statut_legal,
'date_de_naissance': birthdate.isoformat(), 'date_de_naissance': birthdate.isoformat(),
'cles_de_federation': mappings, 'cles_de_federation': mappings,
} }
if not mappings and not subject_relations and not actor_relations:
# individu ignoré RG1: pas de clés, pas de réseau (pas de relations)
self.ignore_individu('RG1', individualid)
continue
if not mappings:
new_cursor = self.connection.cursor()
new_cursor.execute('''
SELECT DISTINCT(ind.id), ind.mappings::jsonb
FROM individual as ind
INNER JOIN individualrelation as ir ON
(ind.id = ir.subjectid AND ir.responsibleid = %s) OR
(ind.id = ir.responsibleid AND ir.subjectid = %s)''',
(individualid, individualid))
no_mappings = all(not reseau_mappings for reseau_id, reseau_mappings in
new_cursor.fetchall())
if no_mappings:
# individu ignoré RG2: pas de clés, et réseau sans clés
self.ignore_individu('RG2', individualid)
continue
if state == 'invalid':
# individu ignoré RG3: ficher invalide
self.ignore_individu('RG3', individualid)
continue
if state == 'invisible' and not mappings:
# individu ignoré RG4: fiche invisible et aucune clé
self.ignore_individu('RG4', individualid)
continue
if state == 'archived':
# individu ignoré RG5: fiche archivée
self.ignore_individu('RG5', individualid)
continue
if state == 'temp':
# individu ignoré RG5: fiche archivée
self.ignore_individu('RG6', individualid)
continue
if telephones: if telephones:
content['telephones'] = telephones content['telephones'] = telephones
if email: if email:
content['email'] = email self.counters['emails_importables'] += 1
e = Entity(id=individualid, email = email.strip()
if self.EMAIL_RE.match(email):
content['email'] = email
else: # RG11: on ignore les emails mal formattée
self.counters['emails_importables.ignores.RG11'] += 1
self.counters['individus_importes.%s.%s' % (statut_legal, genre)] += 1
for key in mappings:
self.counters['cles_de_federation.importes'] += 1
self.counters['cles_de_federation.importes.%s' % key] += 1
# enregistre les swarmid à importer
self.swarmids.add(swarmid)
new_id = individualid
if self.dry:
new_id = None
e = Entity(id=new_id,
created=self.tr, created=self.tr,
schema=individu_schema, schema=individu_schema,
content=content) content=content)
@ -717,14 +849,20 @@ class LoadDump(object):
max_tiers_saga = 0 max_tiers_saga = 0
for tiers_saga, individualid in self.cursor.fetchall(): for tiers_saga, individualid in self.cursor.fetchall():
self.counters['cles_de_federation.saga_tiers'] += 1
if individualid not in individu_mapping:
continue
self.counters['cles_de_federation.importes'] += 1
self.counters['cles_de_federation.importes.saga_tiers'] += 1
cles = individu_mapping[individualid].content['cles_de_federation'] cles = individu_mapping[individualid].content['cles_de_federation']
max_tiers_saga = max(max_tiers_saga, int(str(tiers_saga[1:]))) max_tiers_saga = max(max_tiers_saga, int(str(tiers_saga[2:])))
cles['saga_tiers'] = str(tiers_saga) cles['saga_tiers'] = str(tiers_saga)
Entity.objects.bulk_create(individu_batch) Entity.objects.bulk_create(individu_batch)
connection.cursor().execute("SELECT setval(pg_get_serial_sequence('zoo_data_entity', 'id')," connection.cursor().execute("SELECT setval(pg_get_serial_sequence('zoo_data_entity', 'id'),"
" coalesce(max(id),0) + 1, false) FROM zoo_data_entity") " coalesce(max(id),0) + 1, false) FROM zoo_data_entity")
set_saga_sequence(max_tiers_saga + 1) self.saga_sequence = max_tiers_saga + 1
set_saga_sequence(self.saga_sequence)
@log('Loading addresses... ') @log('Loading addresses... ')
def load_addresses(self): def load_addresses(self):
@ -758,11 +896,22 @@ class LoadDump(object):
ia.addressid = a.id''') ia.addressid = a.id''')
for individualid, addressid, is_primary, streetnumber, streetnumberext, streetname, streetmatriculation, \ for individualid, addressid, is_primary, streetnumber, streetnumberext, streetname, streetmatriculation, \
ext1, ext2, at, city, zipcode, country, inseecode in self.cursor.fetchall(): ext1, ext2, at, city, zipcode, country, inseecode in self.cursor.fetchall():
if individualid not in self.individu_mapping: self.counters['relations_adresse'] += 1
continue # RG10 bis: on retire le préfixe chez
if at and at.lower().strip().startswith('chez'):
at = at.strip()[4:].strip()
if addressid not in adresse_mapping: if addressid not in adresse_mapping:
self.counters['adresses'] += 1
if individualid not in self.individu_mapping:
self.counters['relations_adresse.ignores'] += 1
self.counters['relations_adresse.ignores.individu_ignore'] += 1
continue
if streetnumber:
streetnumber = streetnumber.strip()
content = { content = {
'streetnumber': streetnumber, # RG10: initialisation du numéro de rue à zéro si vide
'streetnumber': streetnumber or '0',
'streetnumberext': streetnumberext, 'streetnumberext': streetnumberext,
'streetname': streetname, 'streetname': streetname,
'streetmatriculation': streetmatriculation, 'streetmatriculation': streetmatriculation,
@ -777,8 +926,13 @@ class LoadDump(object):
e = Entity(created=self.tr, schema=adresse_schema, content=content) e = Entity(created=self.tr, schema=adresse_schema, content=content)
adresse_batch.append(e) adresse_batch.append(e)
adresse_mapping[addressid] = e adresse_mapping[addressid] = e
if individualid not in self.individu_mapping:
self.counters['relations_adresse.ignores.individu_ignore'] += 1
continue
self.counters['relations_adresse.importes'] += 1
individu_adresse_mapping[(individualid, addressid)] = is_primary individu_adresse_mapping[(individualid, addressid)] = is_primary
self.counters['adresses.importes'] = len(adresse_mapping)
Entity.objects.bulk_create(adresse_batch) Entity.objects.bulk_create(adresse_batch)
relation_batch = [] relation_batch = []
@ -811,14 +965,12 @@ class LoadDump(object):
relation_batch = [] relation_batch = []
seen = set()
for label, relationtype, responsibleid, subjectid in self.cursor.fetchall(): for label, relationtype, responsibleid, subjectid in self.cursor.fetchall():
self.counters['relations'] += 1
self.counters['relations.%s' % relationtype] += 1
self.counters['relations.%s.%s' % (relationtype, label)] += 1
if relationtype == 'SituationFamiliale': if relationtype == 'SituationFamiliale':
key = (min(responsibleid, subjectid), max(responsibleid, subjectid))
if key in seen:
continue
seen.add(key)
schema = union_schema schema = union_schema
if label == 'Marie': if label == 'Marie':
kind = 'pacs/mariage' kind = 'pacs/mariage'
@ -839,11 +991,24 @@ class LoadDump(object):
elif label == 'RepresentantPersonneMoraleQualifiee': elif label == 'RepresentantPersonneMoraleQualifiee':
kind = 'representant_personne_morale_qualifiee' kind = 'representant_personne_morale_qualifiee'
elif label == 'Tuteur': elif label == 'Tuteur':
kind = 'tuteur' # conversion tutelle en tiers de confiance
self.counters['relations_modifies.RG7bis'] += 1
kind = 'tiers_de_confiance'
elif label == 'Curatelle':
self.counters['relations.ignores.RG7' % relationtype] += 1
continue
else: else:
raise NotImplementedError('unknown label for relationtype: %s, %s' raise NotImplementedError('unknown label for relationtype: %s, %s'
% (label, relationtype)) % (label, relationtype))
content = {'statut': kind} content = {'statut': kind}
if responsibleid not in self.individu_mapping or subjectid not in self.individu_mapping:
self.counters['relations.ignores.individu_ignore'] += 1
continue
if relationtype == 'ResponsabiliteLegale' and label == 'Tuteur':
self.counters['relations_modifies.RG7bis.importes'] += 1
self.counters['relations_importes'] += 1
self.counters['relations_importes.%s' % relationtype] += 1
self.counters['relations_importes.%s.%s' % (relationtype, kind)] += 1
e = Relation(created=self.tr, schema=schema, left=self.individu_mapping[responsibleid], e = Relation(created=self.tr, schema=schema, left=self.individu_mapping[responsibleid],
right=self.individu_mapping[subjectid], content=content) right=self.individu_mapping[subjectid], content=content)
relation_batch.append(e) relation_batch.append(e)