177 lines
6.9 KiB
Python
177 lines
6.9 KiB
Python
# barbacompta - accounting for dummies
|
|
# Copyright (C) 2010-2019 Entr'ouvert
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify it
|
|
# under the terms of the GNU Affero General Public License as published
|
|
# by the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import itertools
|
|
import xml.etree.ElementTree as ET
|
|
import zipfile
|
|
from xml.dom import pulldom
|
|
|
|
import requests
|
|
from django.core.files.storage import default_storage
|
|
|
|
from . import chorus
|
|
|
|
|
|
def grouper(iterable, n, fillvalue=None):
|
|
"Collect data into fixed-length chunks or blocks"
|
|
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
|
|
args = [iter(iterable)] * n
|
|
return itertools.zip_longest(*args, fillvalue=fillvalue)
|
|
|
|
|
|
class AnnuaireManager:
|
|
STRUCTURE_UNITAIRE_TAG_NAME = 'CPPStructurePartenaireUnitaire'
|
|
|
|
def _update_annuaire(self):
|
|
from . import models
|
|
|
|
count = 0
|
|
insert_count = 0
|
|
update_count = 0
|
|
known = set()
|
|
for structures in grouper(self.download_and_parse_annuaire(), 1000):
|
|
structures = [struct for struct in structures if struct] # ignore None
|
|
inserts = []
|
|
updates = []
|
|
identifiers = {structure.full_identifier for structure in structures}
|
|
known.update(identifiers)
|
|
known_structures = {
|
|
struct.full_identifier: struct
|
|
for struct in models.Structure.objects.filter(full_identifier__in=identifiers)
|
|
}
|
|
|
|
for structure in structures:
|
|
known_structure = known_structures.get(structure.full_identifier)
|
|
if known_structure:
|
|
if known_structure.email != structure.email or str(known_structure) != str(structure):
|
|
structure.id = known_structure.id
|
|
updates.append(structure)
|
|
else:
|
|
inserts.append(structure)
|
|
models.Structure.objects.bulk_create(inserts)
|
|
for update in updates:
|
|
update.save()
|
|
count += len(structures)
|
|
insert_count += len(inserts)
|
|
update_count += len(updates)
|
|
yield count, insert_count, update_count, 0
|
|
obsolete = set(models.Structure.objects.values_list('full_identifier', flat=True)) - known
|
|
models.Structure.objects.filter(full_identifier__in=obsolete).delete()
|
|
yield count, insert_count, update_count, len(obsolete)
|
|
|
|
def update_annuaire(self):
|
|
for stats in self._update_annuaire():
|
|
pass
|
|
|
|
def download_and_parse_annuaire(self):
|
|
etag = None
|
|
|
|
if default_storage.exists('annuaire.etag') and default_storage.exists('annuaire.zip'):
|
|
with default_storage.open('annuaire.etag') as fd:
|
|
etag = fd.read()
|
|
|
|
headers = {}
|
|
if etag:
|
|
headers['If-None-Match'] = etag
|
|
else:
|
|
print('No etag')
|
|
with requests.get(chorus.get_annuaire_url(), stream=True, headers=headers) as response:
|
|
if response.status_code == 200:
|
|
with open(default_storage.path('annuaire.zip'), 'wb') as fd:
|
|
fd.write(response.content)
|
|
with open(default_storage.path('annuaire.etag'), 'w') as fd:
|
|
fd.write(response.headers['ETag'])
|
|
else:
|
|
print('Using already downloaded file')
|
|
|
|
with default_storage.open('annuaire.zip') as zip_fd:
|
|
with zipfile.ZipFile(zip_fd) as zipf:
|
|
for name in zipf.namelist():
|
|
with zipf.open(name) as fd:
|
|
yield from self.parse_annuaire(fd)
|
|
|
|
def parse_annuaire(self, fd):
|
|
from . import models
|
|
|
|
doc = pulldom.parse(fd)
|
|
for event, node in doc:
|
|
if event == pulldom.START_ELEMENT and node.tagName == self.STRUCTURE_UNITAIRE_TAG_NAME:
|
|
doc.expandNode(node)
|
|
document = ET.fromstring(node.toxml())
|
|
structure = self.parse_structure(document)
|
|
try:
|
|
services = structure['Services']
|
|
except KeyError:
|
|
structure['Services'] = []
|
|
else:
|
|
if not isinstance(services['Service'], list):
|
|
structure['Services'] = [services['Service']]
|
|
else:
|
|
structure['Services'] = services['Service']
|
|
if structure['StructureActive'] == 'false':
|
|
continue
|
|
if structure['GestionService'] == 'true':
|
|
for service in structure['Services']:
|
|
if service['ServiceActif'] == 'false':
|
|
continue
|
|
yield models.Structure(
|
|
name=structure['RaisonSociale'][:80],
|
|
siret=structure['Identifiant'],
|
|
service_code=service['Code'],
|
|
service_name=service['Nom'][:80],
|
|
email=structure['AdressePostale'].get('Courriel'),
|
|
engagement_obligatoire=(
|
|
service['GestionEGMT'] == 'true' or structure['GestionEngagement'] == 'true'
|
|
),
|
|
)
|
|
else:
|
|
yield models.Structure(
|
|
name=structure['RaisonSociale'],
|
|
siret=structure['Identifiant'],
|
|
email=structure['AdressePostale'].get('Courriel'),
|
|
engagement_obligatoire=structure['GestionEngagement'] == 'true',
|
|
)
|
|
|
|
def parse_structure(self, structure):
|
|
d = {}
|
|
for node in structure:
|
|
if node.tag in d and not isinstance(d[node.tag], list):
|
|
d[node.tag] = [d[node.tag]]
|
|
if len(node):
|
|
value = self.parse_structure(node)
|
|
else:
|
|
value = node.text
|
|
if node.tag in d and isinstance(d[node.tag], list):
|
|
d[node.tag].append(value)
|
|
else:
|
|
d[node.tag] = value
|
|
return d
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import django
|
|
|
|
django.setup()
|
|
|
|
manager = AnnuaireManager()
|
|
for count, insert_count, update_count, delete_count in manager._update_annuaire():
|
|
print(
|
|
'Analyzed %10d - Inserted %10d - Updated %10d - Deleted %10d\r'
|
|
% (count, insert_count, update_count, delete_count),
|
|
end='',
|
|
)
|
|
print()
|