zoo/zoo/zoo_nanterre/utils.py

1397 lines
53 KiB
Python

# -*- coding: utf-8 -*-
#
# zoo - versatile objects management
# Copyright (C) 2016 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
import functools
import uuid
import io
import csv
import sys
import re
import datetime
import isodate
import operator
import copy
import collections
import json
import time
from dateutil.relativedelta import relativedelta
import psycopg2
from django.conf import settings
from django.contrib.postgres.search import TrigramDistance
from django.db import connection
from django.db.models import Q, F, Value, ExpressionWrapper, CharField, When, Case
from django.db.models.functions import Least, Greatest, Coalesce, Concat
from django.db import transaction
from django.contrib.auth.hashers import make_password
from django.http import HttpResponse
from django.utils import six
from django.utils.timezone import now, make_aware
from django.utils.encoding import force_bytes
from zoo.zoo_meta.models import EntitySchema, RelationSchema
from zoo.zoo_data.models import Entity, Relation, Transaction, Log
from zoo.zoo_data.search import JSONTextRef, Normalize, TextCat
today = datetime.date.today
INDIVIDU_ENT = 'individu'
ADRESSE_ENT = 'adresse'
HABITE_REL = 'habite'
UNION_REL = 'union'
RESPONSABILITE_LEGALE_REL = 'responsabilite-legale'
def make_date(date_var):
'''Extract a date from a datetime, a date, a struct_time or a string'''
if isinstance(date_var, datetime.datetime):
return date_var.date()
if isinstance(date_var, datetime.date):
return date_var
return isodate.parse_date(date_var)
def date_delta(t1, t2):
'''Return the timedelta between two date like values'''
t1, t2 = make_date(t1), make_date(t2)
return t1 - t2
def age_in_years_and_months(born, today=None):
'''Compute age since today as the number of years and months elapsed'''
born = make_date(born)
if today is None:
today = datetime.date.today()
today = make_date(today)
before = (today.month, today.day) < (born.month, born.day)
years = today.year - born.year
months = today.month - born.month
if before:
years -= 1
months += 12
if today.day < born.day:
months -= 1
return years, months
def age_in_years(born, today=None):
'''Compute age since today as the number of years elapsed'''
return age_in_years_and_months(born, today=today)[0]
def conjoint(individu):
for relation in individu.left_relations.all():
if relation.schema.slug != UNION_REL:
continue
return relation.right, relation
for relation in individu.right_relations.all():
if relation.schema.slug != UNION_REL:
continue
return relation.left, relation
return None, None
def enfants(individu):
for relation in individu.left_relations.all():
if relation.schema.slug != RESPONSABILITE_LEGALE_REL:
continue
yield relation.right, relation
def enfants_couple(relation):
enfants_couple = set()
for enfant, rel in enfants(relation.left):
enfants_couple.add(enfant)
for enfant, rel in enfants(relation.right):
enfants_couple.add(enfant)
return enfants_couple
def parents(individu):
for relation in individu.right_relations.all():
if relation.schema.slug != RESPONSABILITE_LEGALE_REL:
continue
yield relation.left, relation
def adresses(individu):
adresses = []
for relation in individu.left_relations.all():
if relation.schema.slug != HABITE_REL:
continue
adresses.append((relation.right, relation))
adresses.sort(key=lambda t: (not t[1].content.get('principale', False), t[0].id))
return adresses
def fratrie(individu):
assert individu.content['statut_legal'] == 'mineur'
def helper_fratrie():
for parent, relp in parents(individu):
for enfant, rele in enfants(parent):
yield enfant
return set(helper_fratrie())
def adresses_norel(individu):
return [adresse for adresse, rel in adresses(individu)]
def adresse(individu):
'''Récupérer l'adresse d'un individu majeur (il ne doit en avoir qu'une seule)'''
l = list(adresses(individu))
if len(l) != 1:
return None
return l[0][0]
class PersonSearch(object):
EMAIL_RE = re.compile(
'^[a-zA-Z0-9.+_-]*@[a-zA-Z0-9.+_-]*$')
DATE_RE1 = re.compile(
'^(?:(?P<year>\d\d|\d\d\d\d)(?:-(?P<month>\d{1,2})(?:-(?P<day>\d{1,2}))?)?)$')
DATE_RE2 = re.compile(
'^(?:(?:(?:(?P<day>\d{1,2})/)?(?P<month>\d{1,2})/)?(?P<year>\d\d|\d\d\d\d))$')
@classmethod
def match_birthdate(cls, birthdate):
return cls.DATE_RE1.match(birthdate) or cls.DATE_RE2.match(birthdate)
@classmethod
def lu(cls, x):
return Normalize(x)
@classmethod
def luv(cls, x):
return cls.lu(Value(x))
@classmethod
def applications(cls):
applications = getattr(settings, 'ZOO_NANTERRE_APPLICATIONS', {})
def helper():
for application, value in applications.items():
yield application, value['name']
return list(helper())
def __init__(self, limit=0.5, base_limit=0.1):
self.birthdates_filters = []
self.filters = {}
self.name_filters = []
self.name_similarities = []
self.email_similarities = []
self.schema = EntitySchema.objects.get(slug=INDIVIDU_ENT)
self.limit = limit
self.base_limit = base_limit
self.annotations = []
def add_filter(self, name, filter_expression):
self.filters.setdefault(name, []).append(filter_expression)
def search_statut_legal(self, statut_legal):
self.add_filter('statut_legal', Q(content__statut_legal=statut_legal))
def search_query(self, query):
'''Take a one line query and try to build a search filter from it'''
emails = []
identifiers = []
birthdates = []
names = []
parts = query.strip().split()
for part in parts:
part = part.strip()
if not part:
continue
if self.EMAIL_RE.match(part):
emails.append(part)
elif part.startswith('#'):
if part[1:]:
identifiers.append(part[1:])
elif self.match_birthdate(part):
birthdates.append(self.match_birthdate(part).groupdict())
else:
names.append(part)
for email in emails:
self = self.search_email(email)
for identifier in identifiers:
self = self.search_identifier(identifier)
for birthdate in birthdates:
if emails or identifiers or names:
self = self.search_birthdate(birthdate, window_days=0)
else:
self = self.search_birthdate(birthdate)
self = self.search_names(names)
return self
def search_email(self, email):
self = copy.deepcopy(self)
self.add_filter('email', self.q_normalize('email', email))
self.email_similarities.append(Value(1.0) - self.distance('email', email))
return self
def search_identifier(self, identifier, key=None):
self = copy.deepcopy(self)
if key:
if identifier == '*':
q = Q(**{'content__cles_de_federation__%s__isnull' % key: False})
else:
q = Q(**{'content__cles_de_federation__%s' % key: identifier})
else:
filters = []
try:
individu_id = int(identifier)
except ValueError:
pass
else:
filters.append(Q(id=individu_id))
for key, name in self.applications():
filters.append(Q(**{'content__cles_de_federation__%s' % key: identifier}))
q = functools.reduce(Q.__or__, filters)
self.add_filter('key', q)
return self
def search_birthdate(self, birthdate, window_days=None):
self = copy.deepcopy(self)
if hasattr(birthdate, 'year'):
if hasattr(birthdate, 'date'):
birthdate = birthdate.date()
# fast path for date / datetime
before = birthdate
after = birthdate
else:
# case of string
if not hasattr(birthdate, 'keys'):
birthdate = self.match_birthdate(birthdate).groupdict()
this_year = datetime.date.today().year % 100
year = int(birthdate['year'])
if year < 100:
if year > this_year:
year += 1900
else:
year += 2000
birthdate['year'] = str(year)
if birthdate['day']:
before = after = datetime.date(
int(birthdate['year']), int(birthdate['month']), int(birthdate['day']))
elif birthdate['month']:
after = datetime.date(int(birthdate['year']), int(birthdate['month']), 1)
before = ((after + datetime.timedelta(days=31)).replace(day=1)
- datetime.timedelta(days=1))
else:
after = datetime.date(int(birthdate['year']), 1, 1)
before = datetime.date(int(birthdate['year']), 12, 31)
first_january_same_year = None
if before == after and window_days is not None:
first_january_same_year = before.replace(month=1, day=1)
if window_days:
after -= datetime.timedelta(days=window_days)
before += datetime.timedelta(days=window_days)
query = Q(content__date_de_naissance__timestamp__gte=after)
query &= Q(content__date_de_naissance__timestamp__lte=before)
# windows days is only used by the duplicate search and when the
# birthdate is not the only query, we use this fact to augment the
# search window with dates before 1903 and dates on the first day of
# the same year, to match dubious imports.
if window_days is not None:
# always look for entities before 1903-01-01
query |= Q(content__date_de_naissance__timestamp__lte=datetime.date(1903, 1, 1))
# if search is exact look for first january of the same year
if first_january_same_year:
query |= Q(content__date_de_naissance__timestamp=first_january_same_year)
self.add_filter('birthdate', query)
return self
@classmethod
def distance(cls, field, value):
return TrigramDistance(cls.lu(JSONTextRef(F('content'), field)), cls.luv(value))
@classmethod
def q_normalize(self, field, value):
return Q(**{'content__%s__normalize__trigram_similar' % field: self.luv(value)})
def search_name(self, fullname, factor=1.0, first_name_weight=1.0, last_name_weight=1.0):
self = copy.deepcopy(self)
fullname_naissance = ExpressionWrapper(
Normalize(TextCat(
JSONTextRef(F('content'), 'prenoms'),
Value(' '),
JSONTextRef(F('content'), 'nom_de_naissance'))),
output_field=CharField())
fullname_usage = ExpressionWrapper(
Normalize(TextCat(
JSONTextRef(F('content'), 'prenoms'),
Value(' '),
JSONTextRef(F('content'), 'nom_d_usage'))),
output_field=CharField())
self.annotations.append(('fullname_naissance', fullname_naissance))
self.annotations.append(('fullname_usage', fullname_usage))
# Create the simple filter
q = (Q(fullname_naissance__trigram_similar=self.luv(fullname))
| Q(fullname_usage__trigram_similar=self.luv(fullname)))
self.add_filter('name', q)
# Compute similarity score
self.name_similarities.append(
Value(1.0) - Case(
When(content__nom_de_naissance='',
then=Value(1.0)),
default=TrigramDistance(fullname_naissance, self.luv(fullname))))
self.name_similarities.append(
Value(1.0) - Case(
When(content__nom_d_usage='',
then=Value(1.0)),
default=TrigramDistance(fullname_usage, self.luv(fullname))))
return self
def search_names(self, names):
if not names:
return self
fullname = u' '.join(names)
self = copy.deepcopy(self)
return self.search_name(fullname)
def search_individu(self, individu, factor=1.0, first_name_weight=1.0, last_name_weight=1.0):
self = copy.deepcopy(self)
prenoms = individu.content['prenoms']
nom_de_naissance = individu.content['nom_de_naissance']
nom_d_usage = individu.content['nom_d_usage']
# Create the simple filter
prenoms_expression = Value(1.0) - TrigramDistance(Normalize(
JSONTextRef(F('content'), 'prenoms')), self.luv(prenoms))
nn_nn_expression = Value(1.0) - TrigramDistance(Normalize(
JSONTextRef(F('content'), 'nom_de_naissance')), self.luv(nom_de_naissance))
nu_nu_expression = Value(1.0) - TrigramDistance(Normalize(
JSONTextRef(F('content'), 'nom_d_usage')), self.luv(nom_d_usage))
nn_nu_expression = Value(1.0) - TrigramDistance(Normalize(
JSONTextRef(F('content'), 'nom_de_naissance')), self.luv(nom_d_usage))
nu_nn_expression = Value(1.0) - TrigramDistance(Normalize(
JSONTextRef(F('content'), 'nom_d_usage')), self.luv(nom_de_naissance))
q_noms = []
if nom_de_naissance:
q_noms.append(self.q_normalize('nom_de_naissance', nom_de_naissance))
self.name_similarities.append(
(first_name_weight * prenoms_expression
+ last_name_weight * nn_nn_expression)
/ Value(first_name_weight + last_name_weight))
self.name_similarities.append(
(first_name_weight * prenoms_expression
+ last_name_weight * nu_nn_expression)
/ Value(first_name_weight + last_name_weight))
q_noms.append(self.q_normalize('nom_d_usage', nom_de_naissance))
if nom_d_usage:
q_noms.append(self.q_normalize('nom_d_usage', nom_d_usage))
q_noms.append(self.q_normalize('nom_de_naissance', nom_d_usage))
self.name_similarities.append(
(first_name_weight * prenoms_expression
+ last_name_weight * nu_nu_expression)
/ Value(first_name_weight + last_name_weight))
self.name_similarities.append(
(first_name_weight * prenoms_expression
+ last_name_weight * nn_nu_expression)
/ Value(first_name_weight + last_name_weight))
q = self.q_normalize('prenoms', prenoms) & functools.reduce(operator.__or__, q_noms)
self.add_filter('name', q)
# Compute similarity score
return self
def copy(self):
return copy.deepcopy(self)
@classmethod
def or_filters(self, filters):
return functools.reduce(operator.__or__, filters, Q())
@classmethod
def add_age(cls, individu):
individu.age = years, months = age_in_years_and_months(
individu.content['date_de_naissance'])
birthdate = make_date(individu.content['date_de_naissance'])
if birthdate > datetime.date.today():
age_label = u'à naître'
else:
if (months, years) == (0, 0):
age_label = u'moins d\'un mois'
elif years < 1:
age_label = u'%s mois' % months
elif years < 2:
age_label = u'%s mois' % (months + 12)
else:
age_label = u'%s ans' % years
individu.age_label = age_label
@classmethod
def add_adresses(cls, individu):
individu.adresses = []
for adresse, relation in adresses(individu):
a = adresse.content.copy()
a.update(relation.content)
a['id'] = adresse.id
individu.adresses.append(a)
@classmethod
def add_enfants(cls, individu):
enfants_l = []
for enfant, relation in enfants(individu):
cls.add_age(enfant)
cls.add_federations(enfant)
cls.add_adresses(enfant)
enfant.responsabilite_legale = relation.content['statut']
enfants_l.append(enfant)
if enfants_l:
individu.enfants = enfants_l
@classmethod
def add_parents(cls, individu):
parents = []
for relation in individu.right_relations.all():
if relation.schema.slug != 'responsabilite-legale':
continue
parent = relation.left
cls.add_age(parent)
cls.add_federations(parent)
parent.responsabilite_legale = relation.content['statut']
parents.append(parent)
if parents:
individu.parents = parents
@classmethod
def add_union(cls, individu):
con, conjoint_rel = conjoint(individu)
if con:
cls.add_age(con)
cls.add_federations(con)
individu.union = con
individu.union_statut = conjoint_rel.content['statut']
@classmethod
def add_federations(cls, individu):
individu.federations = []
cles_de_federation = individu.content.get('cles_de_federation', {})
for federation_key, federation_name in cls.applications():
if cles_de_federation.get(federation_key):
individu.federations.append(federation_name)
def queryset(self, prefetch=True):
connection.cursor().execute('SELECT SET_LIMIT(%s)' % self.base_limit)
qs = Entity.objects.filter(schema=self.schema)
for key, annotation in self.annotations:
qs = qs.annotate(**{key: annotation})
# search filter upon name, id, key, email, birthdate and statut legal
for key in self.filters:
qs = qs.filter(
self.or_filters(
self.filters[key]))
qs = qs.annotate(
fullname2=Concat(
Coalesce(
JSONTextRef(F('content'), 'nom_d_usage'),
JSONTextRef(F('content'), 'nom_de_naissance'),
Value(' ')
),
Value(' '),
JSONTextRef(F('content'), 'prenoms'))
)
# order by similarities or fullname
similarities = []
if self.name_similarities:
e = (Greatest(*self.name_similarities) if len(self.name_similarities) > 1
else self.name_similarities[0])
similarities.append(e)
if self.email_similarities:
e = (Greatest(*self.email_similarities) if len(self.email_similarities) > 1
else self.email_similarities[0])
similarities.append(e)
if similarities:
qs = qs.annotate(similarity=functools.reduce(operator.__add__, similarities) /
Value(len(similarities)))
qs = qs.filter(similarity__gte=self.limit)
qs = qs.order_by('-similarity', 'fullname2')
else:
qs = qs.order_by('fullname2')
if prefetch:
qs = qs.prefetch_related(
'left_relations__schema', 'left_relations__right',
'right_relations__schema', 'right_relations__left',
)
return qs
def __getitem__(self, item):
if hasattr(item, 'start'):
return self.decorate_iter(self.queryset()[item.start:item.stop])
return self.decorate_individu(self.queryset()[item])
@classmethod
def decorate_individu(self, individu):
self.add_age(individu)
self.add_adresses(individu)
self.add_federations(individu)
self.add_enfants(individu)
self.add_parents(individu)
self.add_union(individu)
@classmethod
def decorate_iter(self, qs):
for individu in qs:
self.decorate_individu(individu)
yield individu
def __iter__(self):
return self.decorate_iter(self.queryset())
def integrity_check():
count_union_rels = collections.Counter()
count_parent_rels = collections.Counter()
count_habite_rels = collections.Counter()
rels = Relation.objects.select_related()
for rel in rels.filter(schema__slug=HABITE_REL):
count_habite_rels[rel.left] += 1
for rel in rels.filter(schema__slug=UNION_REL):
key = tuple(sorted([rel.left, rel.right], key=lambda x: x.id))
count_union_rels[key] += 1
count_union_rels[rel.left] += 1
count_union_rels[rel.right] += 1
for rel in rels.filter(schema__slug=RESPONSABILITE_LEGALE_REL):
count_parent_rels[rel.right] += 1
for key, value in count_union_rels.items():
if value > 1:
if isinstance(key, tuple):
yield ("le couple %s / %s est en union plusieurs fois: %s"
% (key[0], key[1], value))
else:
yield ("l'individu %s est en union avec plus d'une personne: %s"
% (key, value))
for key, value in count_parent_rels.items():
if value > 2:
yield ("l'enfant %s a plus de deux parents: %s"
% (key, value))
for key, value in count_habite_rels.items():
if key.content['statut_legal'] == 'majeur' and value > 1:
yield ("l'adulte %s a plus d'une adresse: %s" % (key, value))
if key.content['statut_legal'] == 'mineur' and value > 2:
yield ("l'enfant %s a plus de deux adresses: %s" % (key, value))
id_having_address = [x[0].id for x in count_habite_rels.items()]
for key in Entity.objects.filter(schema__slug=INDIVIDU_ENT):
if key.id not in id_having_address:
if key.content['statut_legal'] == 'majeur':
yield ("l'adulte %s n'a pas d'adresse" % key)
if key.content['statut_legal'] == 'mineur':
yield ("l'enfant %s n'a pas d'adresse" % key)
def upper_dict(d):
'''Transform all string values in d to uppercase'''
for key, value in d.items():
if isinstance(value, six.text_type):
d[key] = value.upper()
class UserFixture(object):
def __init__(self, path):
self.stream = open(path, 'w')
self.stream.write('[\n')
self.first = True
def write_user(self, swarmid, username, password, first_name, last_name, email, last_login):
uid = 'swarmid-%s' % swarmid
date_joined = now().isoformat()
if password:
password = 'bcrypt$' + password
else:
password = make_password(uuid.uuid4().hex)
d = {
'model': 'custom_user.user',
'fields': {
'email': email,
'first_name': first_name,
'last_name': last_name,
'last_login': make_aware(last_login).isoformat(),
'username': username,
'uuid': uid,
'password': password,
'modified': date_joined,
'date_joined': date_joined,
'ou': 1,
}
}
if not self.first:
self.stream.write(',\n')
self.stream.write(json.dumps(d))
self.first = False
return uid
def close(self):
self.stream.write(']')
self.stream.flush()
self.stream.close()
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
class DryLoad(Exception):
pass
class LoadDump(object):
EMAIL_RE = re.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)")
def __init__(self, pg_dsn, authentic_fixture_path, verbosity=1, dry=False):
self.authentic_fixture_path = authentic_fixture_path
self.connection = psycopg2.connect(dsn=pg_dsn)
self.cursor = self.connection.cursor()
self.counters = collections.Counter()
self.tr = Transaction.objects.create(meta="initial import")
self.clock_stack = []
self.verbosity = verbosity
self.dry = dry
for cause in ['RG1', 'RG2', 'RG4', 'RG3']:
self.counters['individus.ignores.%s' % cause] = 0
self.ignored = {}
self.swarmids = set()
def start(self, msg=None):
if msg and self.verbosity > 0:
print(msg, end=' ')
sys.stdout.flush()
self.clock_stack.append(time.time())
def end(self):
if self.verbosity > 0:
print(' DONE (%s s)' % (time.time() - self.clock_stack.pop()))
def log(msg=None):
def decorator(func):
def wrapper(self):
self.start(msg)
func(self)
self.end()
return wrapper
return decorator
@log()
def load(self):
try:
with transaction.atomic():
self.reset_db()
self.load_individuals()
self.load_swarm()
self.load_addresses()
self.load_relations()
if self.dry:
raise DryLoad
except DryLoad:
pass
# tally counters
if self.verbosity > 1:
print()
print('Rapport d\'import :')
print()
for key in sorted(self.counters):
print(' %-80s' % key, ':', '%10d' % self.counters[key])
print()
print(' Prochaine valeur du compteur de séquence SAGA :',
'RG%013d' % self.saga_sequence)
print()
if self.verbosity > 2:
print(' Individus ignorés:')
tally = {}
for iid, causes in self.ignored.items():
tally.setdefault(','.join(sorted(causes)), []).append(iid)
for key in sorted(tally):
print(' ', key, ':', ', '.join(map(str, tally[key])))
print()
@log('Resetting db... ')
def reset_db(self):
if not self.dry:
# pas besoin de nettoyer si on ne va pas garder les modifications
Relation.objects.all().delete()
Entity.objects.all().delete()
@log('Loading swarm users... ')
def load_swarm(self):
user_fixture = UserFixture(self.authentic_fixture_path)
self.cursor.execute('''SELECT id, username, password, firstname, lastname, email,
last_login, status FROM users''')
self.counters['swarmids.lies_a_fiche'] = len(self.swarmids)
for (swarmid, username, password, first_name, last_name, email,
last_login, status) in self.cursor.fetchall():
self.counters['swarmids'] += 1
self.counters['swarmids.%s' % status] += 1
if swarmid not in self.swarmids:
self.counters['swarmids.ignores.individu_ignore'] += 1
continue
if status == 'suspended':
# RG14: on ne garde pas les mots de passe des comptes suspendus
self.counters['swarmids.RG14'] += 1
password = None
# RG14: on ne garde pas les comptes inactifs
if status == 'inactive':
if swarmid in self.swarmids:
self.counters['swarmids.ignores.inactive.lies_a_fiche'] += 1
self.counters['swarmids.ignores.inactive'] += 1
continue
self.counters['swarmids.importes'] += 1
if swarmid in self.swarmids:
self.counters['swarmids.importes.lies_a_fiche'] += 1
self.counters['swarmids.%s.importes.lies_a_fiche' % status] += 1
self.counters['swarmids.%s.importes' % status] += 1
user_fixture.write_user(
swarmid=swarmid,
username=username,
password=password,
first_name=first_name,
last_name=last_name,
email=email,
last_login=last_login)
user_fixture.close()
def ignore_individu(self, cause, individualid):
self.counters['individus.ignores.%s' % cause] += 1
self.ignored.setdefault(individualid, set()).add(cause)
@log('Loading individual... ')
def load_individuals(self):
a2_uuids = set()
# individus sans mappings et avec un réseau sans mapping
rg2_sql = '''
WITH u AS (WITH RECURSIVE network(id1, id2) AS (
SELECT i.id, i.id FROM individual AS i WHERE mappings = '{}'
UNION (WITH aux(id1, id2) AS (SELECT id1, id2 FROM network)
SELECT n.id1::integer, ir.subjectid::integer FROM aux AS n INNER JOIN individualrelation AS ir ON ir.responsibleid = n.id2
UNION ALL
SELECT n.id1::integer, ir.responsibleid::integer FROM aux AS n INNER JOIN individualrelation AS ir ON ir.subjectid = n.id2)
)
SELECT id1, array_agg(id2), bool_and(i.mappings = '{}') AS rg2 FROM network AS n JOIN individual AS i ON i.id = n.id2 GROUP BY id1 ORDER BY id1) SELECT id1 FROM u WHERE rg2;'''
self.cursor.execute(rg2_sql)
rg2 = set(t[0] for t in self.cursor.fetchall())
sql = '''
WITH agg AS (SELECT ind.id as id, array_agg(ir1.relationtype) filter (where ir1.relationtype is not null) as relt1,
array_agg(ir2.relationtype) filter (where ir2.relationtype is not null) as relt2,
array_agg(users.id) filter (where users.id is not null) as swarmids
FROM individual as ind
LEFT JOIN individualrelation as ir1 ON ir1.subjectid = ind.id
LEFT JOIN individualrelation as ir2 ON ir2.responsibleid = ind.id
LEFT JOIN users ON users.id = ind.swarmid AND users.status != 'inactive' GROUP BY ind.id)
SELECT DISTINCT ON (ind.id) ind.id, ind.state, ind.swarmid, ind.gender, ind.firstname, ind.lastname,
ind.nameofuse, ind.email, ind.phones::json,
ind.legalstatus, ind.birthdate, ind.mappings::json,
agg.relt1, agg.relt2, agg.swarmids
FROM individual as ind, agg WHERE ind.id = agg.id'''
self.cursor.execute(sql)
individu_batch = []
individu_schema = EntitySchema.objects.get(slug='individu')
self.individu_mapping = individu_mapping = {}
for (individualid, state, swarmid, gender, firstname, lastname, nameofuse, email, phones,
legalstatus, birthdate, mappings, subject_relations, actor_relations,
swarmids) in self.cursor.fetchall():
# comptage des individus
self.counters['individus'] += 1
if gender == 'Female':
genre = 'femme'
elif gender == 'Male':
genre = 'homme'
elif gender == 'Other':
genre = 'autre'
else:
raise NotImplementedError('unknown gender: %s' % gender)
if legalstatus == 'Adulte':
statut_legal = 'majeur'
elif legalstatus == 'Enfant':
statut_legal = 'mineur'
elif legalstatus == 'Emancipe':
statut_legal = 'emancipe'
else:
raise NotImplementedError('unknown legalstatus: %s' % legalstatus)
self.counters['individus.%s' % statut_legal] += 1
self.counters['individus.%s.%s' % (statut_legal, genre)] += 1
if email:
self.counters['emails'] += 1
for key in mappings:
self.counters['cles_de_federation'] += 1
self.counters['cles_de_federation.%s' % key] += 1
telephones = []
for phone in phones:
if phone['phoneType'] == 'OtherPhone':
kind = 'autre'
elif phone['phoneType'] == 'Pro':
kind = 'pro'
elif phone['phoneType'] == 'Mobile':
kind = 'mobile'
elif phone['phoneType'] == 'Home':
kind = 'autre'
else:
raise NotImplementedError('unknown phoneType: %s' % phone['phoneType'])
telephones.append({'type': kind, 'numero': phone['number']})
ignore = False
if not mappings and not subject_relations and not actor_relations:
# individu ignoré RG1: pas de clés, pas de réseau (pas de relations)
self.ignore_individu('RG1', individualid)
ignore = True
if not mappings and individualid in rg2:
# individu ignoré RG2: pas de clés, et réseau sans clés
self.ignore_individu('RG2', individualid)
ignore = True
mappings = mappings or {}
if state == 'invalid':
# individu ignoré RG3: ficher invalide
self.ignore_individu('RG3', individualid)
ignore = True
if state == 'invisible' and not mappings:
# individu ignoré RG4: fiche invisible et aucune clé
self.ignore_individu('RG4', individualid)
ignore = True
if state == 'archived':
# individu ignoré RG5: fiche archivée
self.ignore_individu('RG5', individualid)
ignore = True
if state == 'temp':
# individu ignoré RG5: fiche archivée
self.ignore_individu('RG6', individualid)
ignore = True
if state == 'pending':
# individu ignoré RG5: fiche archivée
self.ignore_individu('RG6bis', individualid)
ignore = True
if swarmid:
self.counters['individus.swarmid'] += 1
if ignore:
self.counters['individus.ignores'] += 1
if swarmid:
self.counters['individus.swarmid.ignores.RGx'] += 1
continue
if swarmid:
if swarmids:
self.swarmids.add(swarmid)
mappings['authentic'] = 'swarmid-%s' % swarmid
a2_uuids.add(mappings['authentic'])
else:
self.counters['zindividus.swarmid.ignores.swarmid_inconnu'] += 1
# RG8, RG9, RG9bis: reprise nom de naissance, nom d'usage
# on a tout on garde
if lastname and nameofuse:
nom_de_naissance = lastname.upper()
nom_d_usage = nameofuse.upper()
# cas des enfants et des hommes
elif statut_legal == 'mineur' or genre == 'homme':
nom_de_naissance = nameofuse.upper()
nom_d_usage = u''
# cas des femmes
else:
nom_de_naissance = ''
nom_d_usage = nameofuse.upper()
content = {
'genre': genre,
'prenoms': firstname.upper(),
'nom_de_naissance': nom_de_naissance,
'nom_d_usage': nom_d_usage,
'statut_legal': statut_legal,
'date_de_naissance': birthdate.isoformat(),
'cles_de_federation': mappings,
}
if telephones:
content['telephones'] = telephones
if email:
self.counters['emails_importables'] += 1
email = email.strip()
if self.EMAIL_RE.match(email):
content['email'] = email
else: # RG11: on ignore les emails mal formattée
self.counters['emails_importables.ignores.RG11'] += 1
self.counters['individus_importes'] += 1
self.counters['individus_importes.%s' % statut_legal] += 1
self.counters['individus_importes.%s.%s' % (statut_legal, genre)] += 1
for key in mappings:
self.counters['cles_de_federation.importes'] += 1
self.counters['cles_de_federation.importes.%s' % key] += 1
new_id = individualid
if self.dry:
new_id = None
e = Entity(id=new_id,
created=self.tr,
schema=individu_schema,
content=content)
individu_mapping[individualid] = e
individu_batch.append(e)
self.cursor.execute('''SELECT id, individ FROM onlinepayment''')
assert len(self.swarmids) == len(a2_uuids), '%s != %s' % (len(self.swarmids),
len(a2_uuids))
max_tiers_saga = 0
for tiers_saga, individualid in self.cursor.fetchall():
self.counters['cles_de_federation.saga_tiers'] += 1
if individualid not in individu_mapping:
self.counters['cles_de_federation.saga_tiers.ignores_individu_ignore'] += 1
continue
self.counters['cles_de_federation.importes'] += 1
self.counters['cles_de_federation.importes.saga_tiers'] += 1
cles = individu_mapping[individualid].content['cles_de_federation']
max_tiers_saga = max(max_tiers_saga, int(str(tiers_saga[2:])))
cles['saga_tiers'] = str(tiers_saga)
Entity.objects.bulk_create(individu_batch)
connection.cursor().execute("SELECT setval(pg_get_serial_sequence('zoo_data_entity', 'id'),"
" coalesce(max(id),0) + 1, false) FROM zoo_data_entity")
self.saga_sequence = max_tiers_saga + 1
set_saga_sequence(self.saga_sequence)
@log('Loading addresses... ')
def load_addresses(self):
adresse_schema = EntitySchema.objects.get(slug='adresse')
habite_schema = RelationSchema.objects.get(slug=HABITE_REL)
adresse_batch = []
adresse_mapping = {}
adresse_seen = set()
self.individu_adresse_mapping = individu_adresse_mapping = {}
self.cursor.execute('SELECT count(*) FROM address')
self.counters['adresses'] = self.cursor.fetchone()[0]
self.cursor.execute('SELECT count(a.id) FROM address as a LEFT OUTER JOIN '
'individualaddress ia ON ia.addressid = a.id WHERE ia.individualid '
'IS NULL')
self.counters['adresses.ignores.sans_individu'] = self.cursor.fetchone()[0]
# Création des adresses
self.cursor.execute('''
SELECT
ia.individualid,
a.id,
ia.isprimary,
a.streetnumber,
a.streetnumberext,
a.streetname,
a.streetmatriculation,
a.ext1,
a.ext2,
a.at,
a.city,
a.zipcode,
a.country,
a.inseecode
FROM individualaddress as ia, address as a
WHERE
ia.addressid = a.id''')
for individualid, addressid, is_primary, streetnumber, streetnumberext, streetname, streetmatriculation, \
ext1, ext2, at, city, zipcode, country, inseecode in self.cursor.fetchall():
self.counters['relations_adresse'] += 1
if is_primary:
self.counters['relations_adresse.principale'] += 1
if addressid not in adresse_mapping:
adresse_seen.add(addressid)
if individualid not in self.individu_mapping:
self.counters['relations_adresse.ignores'] += 1
self.counters['relations_adresse.ignores.individu_ignore'] += 1
continue
if streetnumber:
streetnumber = streetnumber.strip()
else:
# RG10: on remplace les numéros absents par 0
self.counters['relations_adresse.RG10'] += 1
# RG10 bis: on retire le préfixe chez
if at and at.lower().strip().startswith('chez'):
self.counters['relations_adresse.RG10bis'] += 1
at = at.strip()[4:].strip()
content = {
# RG10: initialisation du numéro de rue à zéro si vide
'streetnumber': streetnumber or '0',
'streetnumberext': streetnumberext,
'streetname': streetname,
'streetmatriculation': streetmatriculation,
'ext1': ext1,
'ext2': ext2,
'at': at,
'city': city,
'zipcode': zipcode,
'country': country,
'inseecode': inseecode
}
e = Entity(created=self.tr, schema=adresse_schema, content=content)
adresse_batch.append(e)
adresse_mapping[addressid] = e
if individualid not in self.individu_mapping:
self.counters['relations_adresse.ignores.individu_ignore'] += 1
continue
self.counters['relations_adresse.importes'] += 1
if is_primary:
self.counters['relations_adresse.importes.principale'] += 1
individu_adresse_mapping[(individualid, addressid)] = is_primary
self.counters['adresses.importes'] = len(adresse_mapping)
self.counters['adresses.ignores.individu_ignore'] = len(
adresse_seen - set(m[1] for m in individu_adresse_mapping))
Entity.objects.bulk_create(adresse_batch)
relation_batch = []
for (a, b), is_primary in individu_adresse_mapping.items():
content = {
'principale': is_primary,
}
e = Relation(created=self.tr,
left=self.individu_mapping[a],
right=adresse_mapping[b],
schema=habite_schema,
content=content)
relation_batch.append(e)
Relation.objects.bulk_create(relation_batch)
@log('Loading relations... ')
def load_relations(self):
responsabilite_legale_schema = RelationSchema.objects.get(slug=RESPONSABILITE_LEGALE_REL)
union_schema = RelationSchema.objects.get(slug=UNION_REL)
# Création des relations entre individus
self.cursor.execute('''
SELECT
label,
relationtype,
responsibleid,
subjectid
FROM individualrelation''')
relation_batch = []
seen_situation_familiale = set()
for label, relationtype, responsibleid, subjectid in self.cursor.fetchall():
self.counters['relations'] += 1
self.counters['relations.%s' % relationtype] += 1
self.counters['relations.%s.%s' % (relationtype, label)] += 1
ignore = False
if relationtype == 'SituationFamiliale':
schema = union_schema
if label == 'Marie':
kind = 'pacs/mariage'
elif label == 'Pacse':
kind = 'pacs/mariage'
elif label == 'UnionLibre':
kind = 'unionlibre'
else:
raise NotImplementedError('unknown label for relationtype: %s, %s'
% (label, relationtype))
t = (label, min(responsibleid, subjectid), max(responsibleid, subjectid))
if t in seen_situation_familiale:
self.counters['relations.%s.ignores.doublon_symetrique' % relationtype] += 1
self.counters['relations.%s.%s.ignores.doublon_symetrique' % (
relationtype, label)] += 1
self.counters['relations.ignores.doublon_symetrique'] += 1
ignore = True
else:
seen_situation_familiale.add(t)
content = {'statut': kind}
elif relationtype == 'ResponsabiliteLegale':
schema = responsabilite_legale_schema
if label == 'Parent':
kind = 'parent'
elif label == 'TiersDeConfiance':
kind = 'tiers_de_confiance'
elif label == 'RepresentantPersonneMoraleQualifiee':
kind = 'representant_personne_morale_qualifiee'
elif label == 'Tuteur':
# conversion tutelle en tiers de confiance
self.counters['relations_modifies.RG7bis'] += 1
kind = 'tiers_de_confiance'
elif label == 'Curatelle':
self.counters['relations.%s.ignores.RG7' % relationtype] += 1
self.counters['relations.%s.%s.ignores.RG7' % (relationtype, label)] += 1
self.counters['relations.ignores.RG7' % relationtype] += 1
ignore = True
else:
raise NotImplementedError('unknown label for relationtype: %s, %s'
% (label, relationtype))
content = {'statut': kind}
if responsibleid not in self.individu_mapping or subjectid not in self.individu_mapping:
self.counters['relations.%s.ignores.individu_ignore' % relationtype] += 1
self.counters['relations.%s.%s.ignores.individu_ignore' % (
relationtype, label)] += 1
self.counters['relations.ignores.individu_ignore'] += 1
ignore = True
if ignore:
self.counters['relations.ignores'] += 1
continue
if relationtype == 'ResponsabiliteLegale' and label == 'Tuteur':
self.counters['relations_modifies.RG7bis.importes'] += 1
self.counters['relations_importes'] += 1
self.counters['relations_importes.%s' % relationtype] += 1
self.counters['relations_importes.%s.%s' % (relationtype, kind)] += 1
e = Relation(created=self.tr, schema=schema, left=self.individu_mapping[responsibleid],
right=self.individu_mapping[subjectid], content=content)
relation_batch.append(e)
Relation.objects.bulk_create(relation_batch)
def is_majeur(individu):
return individu.content['statut_legal'] == 'majeur'
def is_mineur(individu):
return individu.content['statut_legal'] == 'mineur'
def get_application(application):
app_dfn = settings.ZOO_NANTERRE_APPLICATIONS.get(application)
assert app_dfn, 'application %s does not exist' % application
return app_dfn
def get_applications(**kwargs):
def helper():
for app_id, app_dfn in settings.ZOO_NANTERRE_APPLICATIONS.items():
for key, value in kwargs.items():
if value is True and not app_dfn.get(key):
continue
yield app_id
return list(helper())
def journalize(individu, transaction=None, meta=None, **kwargs):
content = kwargs
if meta:
content['meta'] = meta
Log.objects.create(
entity=individu,
transaction=transaction,
content=content)
# Saga sequence helper
def set_saga_sequence(value):
cursor = connection.cursor()
cursor.execute('SELECT 1 FROM information_schema.sequences '
'WHERE sequence_schema = current_schema() '
'AND sequence_name = \'zoo_nanterre_saga_seq\'')
if not cursor.fetchall():
cursor.execute('CREATE SEQUENCE zoo_nanterre_saga_seq START %d' % value)
cursor.execute('SELECT setval(\'zoo_nanterre_saga_seq\', %d, false)' % value)
def get_next_saga_sequence():
cursor = connection.cursor()
cursor.execute('SELECT nextval(\'zoo_nanterre_saga_seq\')')
return cursor.fetchone()[0]
def pair_sort(a, b):
return (a, b) if a < b else (b, a)
def individu_caption(individu):
c = individu.content
s = c['nom_de_naissance']
if c['nom_d_usage']:
if s:
s = u' (%s)' % s
s = c['nom_d_usage'] + s
if c['prenoms']:
if s:
s = ' ' + s
s = c['prenoms'] + s
if c['date_de_naissance']:
s += u' - ' + c['date_de_naissance']
return s
def csv_export_response(rows, filename):
if six.PY3:
with io.StringIO(newline='') as f:
writer = csv.writer(f)
for row in rows:
writer.writerow(map(str, row))
r = HttpResponse(f.getvalue(), content_type='text/csv')
else:
with io.BytesIO() as f:
writer = csv.writer(f)
for row in rows:
writer.writerow(map(force_bytes, row))
r = HttpResponse(f.getvalue(), content_type='text/csv')
r['Content-Disposition'] = 'attachment; filename="%s"' % filename
return r
@transaction.atomic
def passage_a_la_majorite():
from . import fragments
if not getattr(settings, 'ZOO_NANTERRE_PASSAGE_A_LA_MAJORITE', False):
return
transaction = Transaction.get_transaction()
transaction.content = {
'action': 'passage-a-la-majorite',
}
entity_schema = EntitySchema.objects.get(slug=INDIVIDU_ENT)
responsabilite_legale_schema = RelationSchema.objects.get(slug=RESPONSABILITE_LEGALE_REL)
non_majeurs = Entity.objects.filter(schema=entity_schema).exclude(content__statut_legal='majeur')
birthdate_threshold = now().date() - relativedelta(years=18)
transaction.content['birthdate_threshold'] = str(birthdate_threshold)
non_majeurs_to_update = non_majeurs.filter(content__date_de_naissance__timestamp__lte=birthdate_threshold)
updated_entities = non_majeurs_to_update.count()
# delete responsabilite-legale relations with parents
relations_to_delete = list(Relation.objects.filter(
schema=responsabilite_legale_schema,
right__in=non_majeurs_to_update))
deleted_relations = len(relations_to_delete)
schema_adresse = EntitySchema.objects.get(slug=ADRESSE_ENT)
habite_schema = RelationSchema.objects.get(slug=HABITE_REL)
errors = {}
# updating
for individu in non_majeurs_to_update:
individu.content['statut_legal'] = 'majeur'
individu.save()
# computing new adress, reusing first adresse
adrs = adresses(individu)
_parents = parents(individu)
relations_parentales = [rel for parent, rel in _parents]
journalize(
individu,
old_adresses=[adr[0].pk for adr in adrs],
old_parents=[parent.pk for parent, rel in _parents],
transaction=transaction,
text=u'Passage à la majorité')
if adrs:
new_adresse, new_adresse_rel = adrs[0]
# delete link to old addresses
for _0, rel in adrs:
rel.delete()
# create new address
new_adresse.pk = None
new_adresse.created = transaction
new_adresse.save()
assert new_adresse.pk
# link new address to old mineur
new_adresse_rel.pk = None
new_adresse_rel.created = transaction
new_adresse_rel.content['principale'] = False
new_adresse_rel.right = new_adresse
new_adresse_rel.save()
else:
# set unknown address
new_adresse = Entity.objects.create(
schema=schema_adresse,
created=transaction,
content={
"at": "",
"city": "NANTERRE",
"ext1": "",
"ext2": "",
"country": "FR",
"zipcode": "92000",
"inseecode": "92050",
"streetname": "VOIE INCONNUE",
"streetnumber": "0",
"streetnumberext": "",
"streetmatriculation": ""
})
new_adresse_rel = Relation.objects.create(
created=transaction,
left=individu,
right=new_adresse,
schema=habite_schema,
content={
'principale': False,
})
adrs = adresses(individu)
assert len(adrs) == 1, '%s has %d adresses' % (individu, len(adrs))
# launch messages to applications
fragments.PassageALaMajorite.pour_chaque_application(individu, relations_parentales)
# delete relations
for relation in relations_to_delete:
relation.delete()
# we cannot delete relation if no entity is updated
assert updated_entities or (not deleted_relations)
result = transaction.content['result'] = {
'updated_entities': updated_entities,
'deleted_relations': deleted_relations,
'errors': errors,
}
transaction.save()
return result