diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9337005 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +cd06/config.py +cd06/Makefile.config diff --git a/cd06/Makefile b/cd06/Makefile new file mode 100644 index 0000000..9148b7e --- /dev/null +++ b/cd06/Makefile @@ -0,0 +1,7 @@ +include Makefile.config + +all: + python3 script.py target + rm -rf target/tmp/ + lftp -e 'mirror -e -R target Recette; exit' $(SFTP_URL) + diff --git a/cd06/cd06.cron.d b/cd06/cd06.cron.d new file mode 100644 index 0000000..4c153ee --- /dev/null +++ b/cd06/cd06.cron.d @@ -0,0 +1,3 @@ +MAILTO=bdauvergne@entrouvert.com + +0 * * * * bdauvergne cd /home/bdauvergne/cd06; make diff --git a/cd06/cd06/__init__.py b/cd06/cd06/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cd06/cd06/signature.py b/cd06/cd06/signature.py new file mode 100644 index 0000000..83479a6 --- /dev/null +++ b/cd06/cd06/signature.py @@ -0,0 +1,74 @@ +import datetime +import base64 +import hmac +import hashlib +import urllib +import random +import urllib.parse as urlparse + +'''Simple signature scheme for query strings''' + + +def sign_url(url, key, algo='sha256', timestamp=None, nonce=None): + parsed = urlparse.urlparse(url) + new_query = sign_query(parsed.query, key, algo, timestamp, nonce) + return urlparse.urlunparse(parsed[:4] + (new_query,) + parsed[5:]) + + +def sign_query(query, key, algo='sha256', timestamp=None, nonce=None): + if timestamp is None: + timestamp = datetime.datetime.utcnow() + timestamp = timestamp.strftime('%Y-%m-%dT%H:%M:%SZ') + if nonce is None: + nonce = hex(random.SystemRandom().getrandbits(128))[2:-1] + new_query = query + if new_query: + new_query += '&' + new_query += urlparse.urlencode(( + ('algo', algo), + ('timestamp', timestamp), + ('nonce', nonce))) + signature = base64.b64encode(sign_string(new_query, key, algo=algo)) + new_query += '&signature=' + urlparse.quote(signature) + return new_query + + +def sign_string(s, key, algo='sha256', timedelta=30): + digestmod = getattr(hashlib, algo) + if isinstance(s, str): + s = s.encode('utf-8') + if isinstance(key, str): + key = key.encode('utf-8') + hash = hmac.HMAC(key, digestmod=digestmod, msg=s) + return hash.digest() + + +def check_url(url, key, known_nonce=None, timedelta=30): + parsed = urlparse.urlparse(url, 'https') + return check_query(parsed.query, key) + + +def check_query(query, key, known_nonce=None, timedelta=30): + parsed = urlparse.parse_qs(query) + signature = base64.b64decode(parsed['signature'][0]) + algo = parsed['algo'][0] + timestamp = parsed['timestamp'][0] + timestamp = datetime.datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ') + nonce = parsed['nonce'] + unsigned_query = query.split('&signature=')[0] + if known_nonce is not None and known_nonce(nonce): + return False + if abs(datetime.datetime.utcnow() - timestamp) > datetime.timedelta(seconds=timedelta): + return False + return check_string(unsigned_query, signature, key, algo=algo) + + +def check_string(s, signature, key, algo='sha256'): + # constant time compare + signature2 = sign_string(s, key, algo=algo) + if len(signature2) != len(signature): + return False + res = 0 + for a, b in zip(signature, signature2): + res |= ord(a) ^ ord(b) + return res == 0 diff --git a/cd06/cd06/wcs_api.py b/cd06/cd06/wcs_api.py new file mode 100644 index 0000000..a4e343e --- /dev/null +++ b/cd06/cd06/wcs_api.py @@ -0,0 +1,318 @@ +import requests +import urllib.parse as urlparse +import urllib +import isodate +import logging +import base64 + +from . import signature + + +logger = logging.getLogger(__name__) + + +class WcsApiError(Exception): + pass + + +class JSONFile(object): + def __init__(self, d): + self.d = d + + @property + def filename(self): + return self.d.get('filename', '') + + @property + def content_type(self): + return self.d.get('content_type', 'application/octet-stream') + + @property + def content(self): + return base64.b64decode(self.d['content']) + + +class BaseObject(object): + def __init__(self, wcs_api, **kwargs): + self._wcs_api = wcs_api + self.__dict__.update(**kwargs) + + +class FormDataWorkflow(BaseObject): + status = None + fields = None + + def __init__(self, wcs_api, **kwargs): + super(FormDataWorkflow, self).__init__(wcs_api, **kwargs) + if self.status is not None: + self.status = BaseObject(wcs_api, **self.status) + self.fields = self.fields or {} + + +class EvolutionUser(BaseObject): + id = None + name = None + NameID = None + email = None + + +class Evolution(BaseObject): + who = None + status = None + parts = None + + def __init__(self, wcs_api, **kwargs): + super(Evolution, self).__init__(wcs_api, **kwargs) + self.time = isodate.parse_datetime(self.time) + if self.parts: + self.parts = [BaseObject(wcs_api, **part) for part in self.parts] + if self.who: + self.who = EvolutionUser(wcs_api, **self.who) + + +class FormData(BaseObject): + geolocations = None + evolution = None + submissions = None + workflow = None + roles = None + + def __init__(self, wcs_api, **kwargs): + super(FormData, self).__init__(wcs_api, **kwargs) + self.receipt_time = isodate.parse_datetime(self.receipt_time) + if self.submissions: + self.submission = BaseObject(wcs_api, **self.submission) + if self.workflow: + self.workflow = FormDataWorkflow(wcs_api, **self.workflow) + self.evolution = [Evolution(wcs_api, **evo) for evo in self.evolution or []] + self.functions = {} + self.concerned_roles = [] + self.action_roles = [] + for function in self.roles or []: + roles = [Role(wcs_api, **r) for r in self.roles[function]] + if function == 'concerned': + self.concerned_roles.extend(roles) + elif function == 'actions': + self.concerned_roles.extend(roles) + else: + try: + self.functions[function] = roles[0] + except IndexError: + self.functions[function] = None + if 'roles' in self.__dict__: + del self.roles + self.with_files = False + + def __repr__(self): + return '<{klass} {display_id!r}>'.format(klass=self.__class__.__name__, + display_id=self.id) + + @property + def full_fields(self): + if not self.with_files: + data = self._wcs_api.get_json( + self._wcs_api.forms_url, + self.formdef_slug + '/', + str(self.id) + '/') + self.fields = data['fields'] + self.with_files = True + return self.fields + + @property + def endpoint_delay(self): + '''Compute delay as the time when the last not endpoint status precedes an endpoint + status.''' + statuses_map = self.formdef.schema.workflow.statuses_map + s = 0 + for evo in self.evolution[::-1]: + if evo.status: + try: + status = statuses_map[evo.status] + except KeyError: # happen when workflow has changed + return + if status.endpoint: + s = 1 + last = evo.time - self.receipt_time + else: + if s == 1: + return last + else: + return + + def __getitem__(self, key): + value = self.full_fields.get(key) + if not value: + return value + # unserialize files + if isinstance(value, dict) and 'content' in value: + return JSONFile(value) + return value + + +class Workflow(BaseObject): + statuses = None + fields = None + + def __init__(self, wcs_api, **kwargs): + super(Workflow, self).__init__(wcs_api, **kwargs) + self.statuses = [BaseObject(wcs_api, **v) for v in (self.statuses or [])] + assert not hasattr(self.statuses[0], 'startpoint'), 'startpoint is exported by w.c.s. FIXME' + for status in self.statuses: + status.startpoint = False + self.statuses[0].startpoint = True + self.statuses_map = dict((s.id, s) for s in self.statuses) + self.fields = [Field(wcs_api, **field) for field in (self.fields or [])] + + +class Field(BaseObject): + items = None + options = None + varname = None + in_filters = False + anonymise = None + + +class Schema(BaseObject): + category_id = None + category = None + geolocations = None + + def __init__(self, wcs_api, **kwargs): + super(Schema, self).__init__(wcs_api, **kwargs) + self.workflow = Workflow(wcs_api, **self.workflow) + self.fields = [Field(wcs_api, **f) for f in self.fields] + self.geolocations = sorted((k, v) for k, v in (self.geolocations or {}).items()) + + +class FormDef(BaseObject): + geolocations = None + + def __init__(self, wcs_api, **kwargs): + self._wcs_api = wcs_api + self.__dict__.update(**kwargs) + + def __unicode__(self): + return self.title + + @property + def datas(self): + datas = self._wcs_api.get_formdatas(self.slug, full=False) + for data in datas: + data.formdef = self + yield data + + @property + def schema(self): + return self._wcs_api.get_schema(self.slug) + + def __repr__(self): + return '<{klass} {slug!r}>'.format(klass=self.__class__.__name__, slug=self.slug) + + +class Role(BaseObject): + pass + + +class Category(BaseObject): + pass + + +class WcsApi(object): + def __init__(self, url, orig, key, name_id=None, verify=True, slugs=None, batch_size=1000): + self.url = url + self.orig = orig + self.key = key + self.verify = verify + self.cache = {} + self.slugs = slugs or [] + self.batch_size = batch_size + self.name_id = name_id + + @property + def formdefs_url(self): + return urlparse.urljoin(self.url, 'api/formdefs/') + + @property + def forms_url(self): + return urlparse.urljoin(self.url, 'api/forms/') + + @property + def roles_url(self): + return urlparse.urljoin(self.url, 'api/roles') + + def build_url(self, url_parts): + url = url_parts[0] + for url_part in url_parts[1:]: + url = urlparse.urljoin(url, url_part) + return url + + def get_json(self, *url_parts): + url = self.build_url(url_parts) + params = {'orig': self.orig} + if self.name_id: + params['NameID'] = self.name_id + query_string = urlparse.urlencode(params) + presigned_url = url + ('&' if '?' in url else '?') + query_string + if presigned_url in self.cache: + return self.cache[presigned_url] + signed_url = signature.sign_url(presigned_url, self.key) + try: + response = requests.get(signed_url, verify=self.verify) + response.raise_for_status() + except requests.RequestException as e: + raise WcsApiError('GET request failed', signed_url, e) + else: + try: + content = response.json() + self.cache[presigned_url] = content + return content + except ValueError as e: + raise WcsApiError('Invalid JSON content', signed_url, e) + + @property + def roles(self): + return [Role(wcs_api=self, **d) for d in self.get_json(self.roles_url)['data']] + + @property + def formdefs(self): + result = self.get_json(self.formdefs_url) + if isinstance(result, dict): + if result['err'] == 0: + data = result['data'] + else: + logger.error(u'could not retrieve formdefs from %s, err_desc: %s', + self.formdefs_url, result.get('err_desc')) + return [] + else: + data = result + return [FormDef(wcs_api=self, **d) for d in data + if not self.slugs or d['slug'] in self.slugs] + + @property + def categories(self): + d = {} + for f in self.formdefs: + if hasattr(f.schema, 'category'): + d[f.schema.category_id] = f.schema.category + return [Category(wcs_api=self, id=k, name=v) for k, v in d.items()] + + def get_formdatas(self, slug, full=True): + offset = 0 + limit = self.batch_size + while True: + data = self.get_json( + self.forms_url, + slug + '/list?anonymise&full=%s&offset=%d&limit=%d' % ( + 'on' if full else 'off', offset, limit)) + for d in data: + # w.c.s. had a bug where some formdata lost their draft status, skip them + if not d.get('receipt_time'): + continue + yield FormData(wcs_api=self, formdef_slug=slug, **d) + if len(data) < limit: + break + offset += limit + + def get_schema(self, slug): + json_schema = self.get_json(self.formdefs_url, slug + '/', 'schema?anonymise') + return Schema(wcs_api=self, **json_schema) diff --git a/cd06/script.py b/cd06/script.py new file mode 100644 index 0000000..6cde5c1 --- /dev/null +++ b/cd06/script.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- + +import sys +import os +import shutil +from cd06.wcs_api import WcsApi + +from config import WCS_URL, WCS_ORIG, WCS_APIKEY, USER_UUID + +api = WcsApi(WCS_URL, + WCS_ORIG, + WCS_APIKEY, + name_id=USER_UUID) + +target_dir = sys.argv[1] + +FORMDEFS = { + 'premiere-demande-d-apa-a-domicile': { + 'DIRECTORY': 'premiere-demande-apa', + 'MAPPINGS': [ + # ('Nom', 'Variable', 'Nommage fichier',) + ('Mandat de délégation signé', 'mandat', 'MANDAT_DELEGATION.PDF',), + ('Copie du jugement mesure de protection par une personne', + 'jugement_mesure_protection_personne', + 'JUGEMENT_MESURE_DE_PROTECTION_PERSONNE.PDF',), + ('Copie du jugement mesure de protection par un organisme personne', + 'jugement_mesure_protection_organisme', + 'JUGEMENT_MESURE_DE_PROTECTION_ORGANISME.PDF',), + ('Justificatif d\'identité demandeur', 'piece_identite_demandeur', 'JUSTIF_IDENT_DEMANDEUR.PDF',), + ('RIB demandeur', 'rib_demandeur', 'RIB_RIP_DEMANDEUR.PDF',), + ('Taxe foncière 1er bien', 'taxe_fonciere_1', 'TAXE_FONCIERE_BIEN_1.PDF',), + ('Taxe foncière 2ème bien', 'taxe_fonciere_2', 'TAXE_FONCIERE_BIEN_2.PDF',), + ('Taxe foncière 3ème bien', 'taxe_fonciere_3', 'TAXE_FONCIERE_BIEN_3.PDF',), + ('Avis d\'imposition', 'avis_imposition', 'AVIS_IMPOSITION.PDF',), + ('Document supplémentaire 1', 'document_supplementaire_1', 'DOCUMENT_SUPPLEMENTAIRE_1.PDF',), + ('Document supplémentaire 2', 'document_supplementaire_2', 'DOCUMENT_SUPPLEMENTAIRE_2.PDF',), + ('Document supplémentaire 3', 'document_supplementaire_3', 'DOCUMENT_SUPPLEMENTAIRE_3.PDF',), + ] + }, + 'aggravation-revision-apa': { + 'DIRECTORY': 'aggravation-apa', + 'MAPPINGS': [ + # ('Nom', 'Variable', 'Nommage fichier',) + ('Mandat de délégation signé', 'mandat', 'MANDAT_DELEGATION.PDF',), + ('Copie du jugement mesure de protection par une personne', + 'jugement_mesure_protection_personne', + 'JUGEMENT_MESURE_DE_PROTECTION_PERSONNE.PDF',), + ('Copie du jugement mesure de protection par un organisme personne', + 'jugement_mesure_protection_organisme', + 'JUGEMENT_MESURE_DE_PROTECTION_ORGANISME.PDF',), + ('Taxe foncière 1er bien', 'taxe_fonciere_1', 'TAXE_FONCIERE_BIEN_1.PDF',), + ('Taxe foncière 2ème bien', 'taxe_fonciere_2', 'TAXE_FONCIERE_BIEN_2.PDF',), + ('Taxe foncière 3ème bien', 'taxe_fonciere_3', 'TAXE_FONCIERE_BIEN_3.PDF',), + ('Avis d\'imposition', 'avis_imposition', 'AVIS_IMPOSITION.PDF',), + ] + } +} + +print('Moving files into', target_dir) + +tmp_dir = os.path.join(target_dir, 'tmp') +if not os.path.exists(tmp_dir): + os.makedirs(tmp_dir) + +for formdef in api.formdefs: + if formdef.slug not in FORMDEFS: + continue + print(formdef.title) + MAPPINGS = FORMDEFS[formdef.slug]['MAPPINGS'] + DIRECTORY = FORMDEFS[formdef.slug]['DIRECTORY'] + formdef_dir = os.path.join(target_dir, DIRECTORY) + if not os.path.exists(formdef_dir): + os.makedirs(formdef_dir) + for formdata in formdef.datas: + formdata_dir = os.path.join(formdef_dir, str(formdata.id)) + formdata_tmp_dir = os.path.join(tmp_dir, DIRECTORY, str(formdata.id)) + + if os.path.exists(formdata_dir): + continue + try: + shutil.rmtree(formdata_tmp_dir, True) + os.makedirs(formdata_tmp_dir) + print(' Form', formdata.id) + for name, key, filename in MAPPINGS: + value = formdata[key] + if not value: + continue + print(' -', name, ':', value.filename) + prefix, suffix = os.path.splitext(filename) + new_filename = prefix + os.path.splitext(value.filename)[1] + attachment_path = os.path.join(formdata_tmp_dir, new_filename) + print('Putting in', attachment_path) + with open(attachment_path, 'wb') as f: + f.write(value.content) + os.rename(formdata_tmp_dir, formdata_dir) + finally: + shutil.rmtree(formdata_tmp_dir, True) + print()