From 3648411eea22419ee7275051a78092bcb9a9ac44 Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Wed, 26 Jul 2017 14:12:20 +0200 Subject: [PATCH] =?UTF-8?q?nanterre:=20ajout=20d'un=20mode=20simul=C3=A9?= =?UTF-8?q?=20et=20d'un=20rapport=20=C3=A0=20la=20commande=20d'import?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 5 +- .../management/commands/rsu-load-dump.py | 7 +- zoo/zoo_nanterre/utils.py | 243 +++++++++++++++--- 3 files changed, 214 insertions(+), 41 deletions(-) diff --git a/Makefile b/Makefile index 0062f21..67f6288 100644 --- a/Makefile +++ b/Makefile @@ -13,4 +13,7 @@ import: createdb nanterre_rsu psql nanterre_rsu 0: @@ -613,28 +627,57 @@ class LoadDump(object): return decorator @log() - @transaction.atomic def load(self): - self.reset_db() - self.load_swarm() - self.load_individuals() - self.load_addresses() - self.load_relations() + try: + with transaction.atomic(): + self.reset_db() + self.load_individuals() + self.load_swarm() + self.load_addresses() + self.load_relations() + if self.dry: + raise DryLoad + except DryLoad: + pass + # tally counters + if self.verbosity > 1: + print() + print('Rapport d\'import :') + print() + for key in sorted(self.counters): + print(' %-80s' % key, ':', '%10d' % self.counters[key]) + print() + print(' Compteur de séquence SAGA :', self.saga_sequence) @log('Resetting db... ') def reset_db(self): - Relation.objects.all().delete() - Entity.objects.all().delete() + if not self.dry: + # pas besoin de nettoyer si on ne va pas garder les modifications + Relation.objects.all().delete() + Entity.objects.all().delete() @log('Loading swarm users... ') def load_swarm(self): user_fixture = UserFixture(self.authentic_fixture_path) - self.cursor.execute("""SELECT id, username, password, firstname, lastname, email, last_login FROM users - WHERE status = 'active'""") - self.swarmid_mapping = swarmid_mapping = {} + self.cursor.execute('''SELECT id, username, password, firstname, lastname, email, + last_login, status FROM users''') for (swarmid, username, password, first_name, last_name, email, - last_login) in self.cursor.fetchall(): - uid = user_fixture.write_user( + last_login, status) in self.cursor.fetchall(): + self.counters['swarmids'] += 1 + self.counters['swarmids.%s' % status] += 1 + if swarmid not in self.swarmids: + self.counters['swarmids.ignores.individu_ignore'] += 1 + continue + if status == 'suspended': + # RG14: on ne garde pas les mots de passe des comptes suspendus + password = None + # RG14: on ne garde pas les comptes inactifs + if status == 'inactive': + self.counters['swarmids.ignores.inactive'] += 1 + continue + self.counters['swarmids.importes'] += 1 + self.counters['swarmids.importes.%s' % status] += 1 + user_fixture.write_user( swarmid=swarmid, username=username, password=password, @@ -642,22 +685,40 @@ class LoadDump(object): last_name=last_name, email=email, last_login=last_login) - swarmid_mapping[swarmid] = uid user_fixture.close() + def ignore_individu(self, cause, individualid): + self.counters['individus.ignores'] += 1 + self.counters['individus.ignores.%s' % cause] += 1 + @log('Loading individual... ') def load_individuals(self): - self.cursor.execute('''SELECT id, swarmid, gender, firstname, lastname, nameofuse, email, phones::json, - legalstatus, birthdate, mappings::json - FROM individual''') + sql = ''' +WITH agg AS (SELECT ind.id as id, array_agg(ir1.relationtype) as relt1, + array_agg(ir2.relationtype) as relt2, + array_agg(users.id) as swarmids + FROM individual as ind + LEFT JOIN individualrelation as ir1 ON ir1.subjectid = ind.id + LEFT JOIN individualrelation as ir2 ON ir2.responsibleid = ind.id + LEFT JOIN users ON users.id = ind.swarmid GROUP BY ind.id) +SELECT DISTINCT ON (ind.id) ind.id, ind.state, ind.swarmid, ind.gender, ind.firstname, ind.lastname, + ind.nameofuse, ind.email, ind.phones::json, + ind.legalstatus, ind.birthdate, ind.mappings::json, + agg.relt1, agg.relt2, agg.swarmids +FROM individual as ind, agg WHERE ind.id = agg.id''' + self.cursor.execute(sql) individu_batch = [] individu_schema = EntitySchema.objects.get(slug='individu') self.individu_mapping = individu_mapping = {} + self.swarmids = set() + + for (individualid, state, swarmid, gender, firstname, lastname, nameofuse, email, phones, + legalstatus, birthdate, mappings, subject_relations, actor_relations, swarmids) in self.cursor.fetchall(): + # comptage des individus + self.counters['individus'] += 1 - for (individualid, swarmid, gender, firstname, lastname, nameofuse, email, phones, - legalstatus, birthdate, mappings) in self.cursor.fetchall(): if gender == 'Female': genre = 'femme' elif gender == 'Male': @@ -676,6 +737,14 @@ class LoadDump(object): else: raise NotImplementedError('unknown legalstatus: %s' % legalstatus) + self.counters['individus.%s' % statut_legal] += 1 + self.counters['individus.%s.%s' % (statut_legal, genre)] += 1 + if email: + self.counters['emails'] += 1 + + for key in mappings: + self.counters['cles_de_federation.%s' % key] += 1 + telephones = [] for phone in phones: if phone['phoneType'] == 'OtherPhone': @@ -691,22 +760,85 @@ class LoadDump(object): telephones.append({'type': kind, 'numero': phone['number']}) mappings = mappings or {} - if swarmid in self.swarmid_mapping: - mappings['authentic'] = self.swarmid_mapping[swarmid] + if swarmid and swarmids: + mappings['authentic'] = 'swarmid-%s' % swarmid + # RG8, RG9, RG9bis: reprise nom de naissance, nom d'usage + # on a tout on garde + if lastname and nameofuse: + nom_de_naissance = lastname.upper() + nom_d_usage = nameofuse.upper() + # cas des enfants et des hommes + elif statut_legal == 'mineur' or genre == 'homme': + nom_de_naissance = nameofuse.upper() + nom_d_usage = u'' + # cas des femmes + else: + nom_de_naissance == '' + nom_d_usage = nameofuse.upper() content = { 'genre': genre, 'prenoms': firstname.upper(), - 'nom_de_naissance': lastname.upper() if lastname else '', - 'nom_d_usage': nameofuse.upper(), + 'nom_de_naissance': nom_de_naissance, + 'nom_d_usage': nom_d_usage, 'statut_legal': statut_legal, 'date_de_naissance': birthdate.isoformat(), 'cles_de_federation': mappings, } + if not mappings and not subject_relations and not actor_relations: + # individu ignoré RG1: pas de clés, pas de réseau (pas de relations) + self.ignore_individu('RG1', individualid) + continue + if not mappings: + new_cursor = self.connection.cursor() + new_cursor.execute(''' +SELECT DISTINCT(ind.id), ind.mappings::jsonb +FROM individual as ind +INNER JOIN individualrelation as ir ON + (ind.id = ir.subjectid AND ir.responsibleid = %s) OR + (ind.id = ir.responsibleid AND ir.subjectid = %s)''', + (individualid, individualid)) + no_mappings = all(not reseau_mappings for reseau_id, reseau_mappings in + new_cursor.fetchall()) + if no_mappings: + # individu ignoré RG2: pas de clés, et réseau sans clés + self.ignore_individu('RG2', individualid) + continue + if state == 'invalid': + # individu ignoré RG3: ficher invalide + self.ignore_individu('RG3', individualid) + continue + if state == 'invisible' and not mappings: + # individu ignoré RG4: fiche invisible et aucune clé + self.ignore_individu('RG4', individualid) + continue + if state == 'archived': + # individu ignoré RG5: fiche archivée + self.ignore_individu('RG5', individualid) + continue + if state == 'temp': + # individu ignoré RG5: fiche archivée + self.ignore_individu('RG6', individualid) + continue + if telephones: content['telephones'] = telephones if email: - content['email'] = email - e = Entity(id=individualid, + self.counters['emails_importables'] += 1 + email = email.strip() + if self.EMAIL_RE.match(email): + content['email'] = email + else: # RG11: on ignore les emails mal formattée + self.counters['emails_importables.ignores.RG11'] += 1 + self.counters['individus_importes.%s.%s' % (statut_legal, genre)] += 1 + for key in mappings: + self.counters['cles_de_federation.importes'] += 1 + self.counters['cles_de_federation.importes.%s' % key] += 1 + # enregistre les swarmid à importer + self.swarmids.add(swarmid) + new_id = individualid + if self.dry: + new_id = None + e = Entity(id=new_id, created=self.tr, schema=individu_schema, content=content) @@ -717,14 +849,20 @@ class LoadDump(object): max_tiers_saga = 0 for tiers_saga, individualid in self.cursor.fetchall(): + self.counters['cles_de_federation.saga_tiers'] += 1 + if individualid not in individu_mapping: + continue + self.counters['cles_de_federation.importes'] += 1 + self.counters['cles_de_federation.importes.saga_tiers'] += 1 cles = individu_mapping[individualid].content['cles_de_federation'] - max_tiers_saga = max(max_tiers_saga, int(str(tiers_saga[1:]))) + max_tiers_saga = max(max_tiers_saga, int(str(tiers_saga[2:]))) cles['saga_tiers'] = str(tiers_saga) Entity.objects.bulk_create(individu_batch) connection.cursor().execute("SELECT setval(pg_get_serial_sequence('zoo_data_entity', 'id')," " coalesce(max(id),0) + 1, false) FROM zoo_data_entity") - set_saga_sequence(max_tiers_saga + 1) + self.saga_sequence = max_tiers_saga + 1 + set_saga_sequence(self.saga_sequence) @log('Loading addresses... ') def load_addresses(self): @@ -758,11 +896,22 @@ class LoadDump(object): ia.addressid = a.id''') for individualid, addressid, is_primary, streetnumber, streetnumberext, streetname, streetmatriculation, \ ext1, ext2, at, city, zipcode, country, inseecode in self.cursor.fetchall(): - if individualid not in self.individu_mapping: - continue + self.counters['relations_adresse'] += 1 + # RG10 bis: on retire le préfixe chez + if at and at.lower().strip().startswith('chez'): + at = at.strip()[4:].strip() if addressid not in adresse_mapping: + self.counters['adresses'] += 1 + + if individualid not in self.individu_mapping: + self.counters['relations_adresse.ignores'] += 1 + self.counters['relations_adresse.ignores.individu_ignore'] += 1 + continue + if streetnumber: + streetnumber = streetnumber.strip() content = { - 'streetnumber': streetnumber, + # RG10: initialisation du numéro de rue à zéro si vide + 'streetnumber': streetnumber or '0', 'streetnumberext': streetnumberext, 'streetname': streetname, 'streetmatriculation': streetmatriculation, @@ -777,8 +926,13 @@ class LoadDump(object): e = Entity(created=self.tr, schema=adresse_schema, content=content) adresse_batch.append(e) adresse_mapping[addressid] = e + if individualid not in self.individu_mapping: + self.counters['relations_adresse.ignores.individu_ignore'] += 1 + continue + self.counters['relations_adresse.importes'] += 1 individu_adresse_mapping[(individualid, addressid)] = is_primary + self.counters['adresses.importes'] = len(adresse_mapping) Entity.objects.bulk_create(adresse_batch) relation_batch = [] @@ -811,14 +965,12 @@ class LoadDump(object): relation_batch = [] - seen = set() - for label, relationtype, responsibleid, subjectid in self.cursor.fetchall(): + self.counters['relations'] += 1 + self.counters['relations.%s' % relationtype] += 1 + self.counters['relations.%s.%s' % (relationtype, label)] += 1 + if relationtype == 'SituationFamiliale': - key = (min(responsibleid, subjectid), max(responsibleid, subjectid)) - if key in seen: - continue - seen.add(key) schema = union_schema if label == 'Marie': kind = 'pacs/mariage' @@ -839,11 +991,24 @@ class LoadDump(object): elif label == 'RepresentantPersonneMoraleQualifiee': kind = 'representant_personne_morale_qualifiee' elif label == 'Tuteur': - kind = 'tuteur' + # conversion tutelle en tiers de confiance + self.counters['relations_modifies.RG7bis'] += 1 + kind = 'tiers_de_confiance' + elif label == 'Curatelle': + self.counters['relations.ignores.RG7' % relationtype] += 1 + continue else: raise NotImplementedError('unknown label for relationtype: %s, %s' % (label, relationtype)) content = {'statut': kind} + if responsibleid not in self.individu_mapping or subjectid not in self.individu_mapping: + self.counters['relations.ignores.individu_ignore'] += 1 + continue + if relationtype == 'ResponsabiliteLegale' and label == 'Tuteur': + self.counters['relations_modifies.RG7bis.importes'] += 1 + self.counters['relations_importes'] += 1 + self.counters['relations_importes.%s' % relationtype] += 1 + self.counters['relations_importes.%s.%s' % (relationtype, kind)] += 1 e = Relation(created=self.tr, schema=schema, left=self.individu_mapping[responsibleid], right=self.individu_mapping[subjectid], content=content) relation_batch.append(e)