add cd06 scripts

This commit is contained in:
Benjamin Dauvergne 2018-11-06 18:20:57 +01:00
parent 003f3f8ea6
commit c13da9cb99
7 changed files with 502 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
cd06/config.py
cd06/Makefile.config

7
cd06/Makefile Normal file
View File

@ -0,0 +1,7 @@
include Makefile.config
all:
python3 script.py target
rm -rf target/tmp/
lftp -e 'mirror -e -R target Recette; exit' $(SFTP_URL)

3
cd06/cd06.cron.d Normal file
View File

@ -0,0 +1,3 @@
MAILTO=bdauvergne@entrouvert.com
0 * * * * bdauvergne cd /home/bdauvergne/cd06; make

0
cd06/cd06/__init__.py Normal file
View File

74
cd06/cd06/signature.py Normal file
View File

@ -0,0 +1,74 @@
import datetime
import base64
import hmac
import hashlib
import urllib
import random
import urllib.parse as urlparse
'''Simple signature scheme for query strings'''
def sign_url(url, key, algo='sha256', timestamp=None, nonce=None):
parsed = urlparse.urlparse(url)
new_query = sign_query(parsed.query, key, algo, timestamp, nonce)
return urlparse.urlunparse(parsed[:4] + (new_query,) + parsed[5:])
def sign_query(query, key, algo='sha256', timestamp=None, nonce=None):
if timestamp is None:
timestamp = datetime.datetime.utcnow()
timestamp = timestamp.strftime('%Y-%m-%dT%H:%M:%SZ')
if nonce is None:
nonce = hex(random.SystemRandom().getrandbits(128))[2:-1]
new_query = query
if new_query:
new_query += '&'
new_query += urlparse.urlencode((
('algo', algo),
('timestamp', timestamp),
('nonce', nonce)))
signature = base64.b64encode(sign_string(new_query, key, algo=algo))
new_query += '&signature=' + urlparse.quote(signature)
return new_query
def sign_string(s, key, algo='sha256', timedelta=30):
digestmod = getattr(hashlib, algo)
if isinstance(s, str):
s = s.encode('utf-8')
if isinstance(key, str):
key = key.encode('utf-8')
hash = hmac.HMAC(key, digestmod=digestmod, msg=s)
return hash.digest()
def check_url(url, key, known_nonce=None, timedelta=30):
parsed = urlparse.urlparse(url, 'https')
return check_query(parsed.query, key)
def check_query(query, key, known_nonce=None, timedelta=30):
parsed = urlparse.parse_qs(query)
signature = base64.b64decode(parsed['signature'][0])
algo = parsed['algo'][0]
timestamp = parsed['timestamp'][0]
timestamp = datetime.datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ')
nonce = parsed['nonce']
unsigned_query = query.split('&signature=')[0]
if known_nonce is not None and known_nonce(nonce):
return False
if abs(datetime.datetime.utcnow() - timestamp) > datetime.timedelta(seconds=timedelta):
return False
return check_string(unsigned_query, signature, key, algo=algo)
def check_string(s, signature, key, algo='sha256'):
# constant time compare
signature2 = sign_string(s, key, algo=algo)
if len(signature2) != len(signature):
return False
res = 0
for a, b in zip(signature, signature2):
res |= ord(a) ^ ord(b)
return res == 0

318
cd06/cd06/wcs_api.py Normal file
View File

@ -0,0 +1,318 @@
import requests
import urllib.parse as urlparse
import urllib
import isodate
import logging
import base64
from . import signature
logger = logging.getLogger(__name__)
class WcsApiError(Exception):
pass
class JSONFile(object):
def __init__(self, d):
self.d = d
@property
def filename(self):
return self.d.get('filename', '')
@property
def content_type(self):
return self.d.get('content_type', 'application/octet-stream')
@property
def content(self):
return base64.b64decode(self.d['content'])
class BaseObject(object):
def __init__(self, wcs_api, **kwargs):
self._wcs_api = wcs_api
self.__dict__.update(**kwargs)
class FormDataWorkflow(BaseObject):
status = None
fields = None
def __init__(self, wcs_api, **kwargs):
super(FormDataWorkflow, self).__init__(wcs_api, **kwargs)
if self.status is not None:
self.status = BaseObject(wcs_api, **self.status)
self.fields = self.fields or {}
class EvolutionUser(BaseObject):
id = None
name = None
NameID = None
email = None
class Evolution(BaseObject):
who = None
status = None
parts = None
def __init__(self, wcs_api, **kwargs):
super(Evolution, self).__init__(wcs_api, **kwargs)
self.time = isodate.parse_datetime(self.time)
if self.parts:
self.parts = [BaseObject(wcs_api, **part) for part in self.parts]
if self.who:
self.who = EvolutionUser(wcs_api, **self.who)
class FormData(BaseObject):
geolocations = None
evolution = None
submissions = None
workflow = None
roles = None
def __init__(self, wcs_api, **kwargs):
super(FormData, self).__init__(wcs_api, **kwargs)
self.receipt_time = isodate.parse_datetime(self.receipt_time)
if self.submissions:
self.submission = BaseObject(wcs_api, **self.submission)
if self.workflow:
self.workflow = FormDataWorkflow(wcs_api, **self.workflow)
self.evolution = [Evolution(wcs_api, **evo) for evo in self.evolution or []]
self.functions = {}
self.concerned_roles = []
self.action_roles = []
for function in self.roles or []:
roles = [Role(wcs_api, **r) for r in self.roles[function]]
if function == 'concerned':
self.concerned_roles.extend(roles)
elif function == 'actions':
self.concerned_roles.extend(roles)
else:
try:
self.functions[function] = roles[0]
except IndexError:
self.functions[function] = None
if 'roles' in self.__dict__:
del self.roles
self.with_files = False
def __repr__(self):
return '<{klass} {display_id!r}>'.format(klass=self.__class__.__name__,
display_id=self.id)
@property
def full_fields(self):
if not self.with_files:
data = self._wcs_api.get_json(
self._wcs_api.forms_url,
self.formdef_slug + '/',
str(self.id) + '/')
self.fields = data['fields']
self.with_files = True
return self.fields
@property
def endpoint_delay(self):
'''Compute delay as the time when the last not endpoint status precedes an endpoint
status.'''
statuses_map = self.formdef.schema.workflow.statuses_map
s = 0
for evo in self.evolution[::-1]:
if evo.status:
try:
status = statuses_map[evo.status]
except KeyError: # happen when workflow has changed
return
if status.endpoint:
s = 1
last = evo.time - self.receipt_time
else:
if s == 1:
return last
else:
return
def __getitem__(self, key):
value = self.full_fields.get(key)
if not value:
return value
# unserialize files
if isinstance(value, dict) and 'content' in value:
return JSONFile(value)
return value
class Workflow(BaseObject):
statuses = None
fields = None
def __init__(self, wcs_api, **kwargs):
super(Workflow, self).__init__(wcs_api, **kwargs)
self.statuses = [BaseObject(wcs_api, **v) for v in (self.statuses or [])]
assert not hasattr(self.statuses[0], 'startpoint'), 'startpoint is exported by w.c.s. FIXME'
for status in self.statuses:
status.startpoint = False
self.statuses[0].startpoint = True
self.statuses_map = dict((s.id, s) for s in self.statuses)
self.fields = [Field(wcs_api, **field) for field in (self.fields or [])]
class Field(BaseObject):
items = None
options = None
varname = None
in_filters = False
anonymise = None
class Schema(BaseObject):
category_id = None
category = None
geolocations = None
def __init__(self, wcs_api, **kwargs):
super(Schema, self).__init__(wcs_api, **kwargs)
self.workflow = Workflow(wcs_api, **self.workflow)
self.fields = [Field(wcs_api, **f) for f in self.fields]
self.geolocations = sorted((k, v) for k, v in (self.geolocations or {}).items())
class FormDef(BaseObject):
geolocations = None
def __init__(self, wcs_api, **kwargs):
self._wcs_api = wcs_api
self.__dict__.update(**kwargs)
def __unicode__(self):
return self.title
@property
def datas(self):
datas = self._wcs_api.get_formdatas(self.slug, full=False)
for data in datas:
data.formdef = self
yield data
@property
def schema(self):
return self._wcs_api.get_schema(self.slug)
def __repr__(self):
return '<{klass} {slug!r}>'.format(klass=self.__class__.__name__, slug=self.slug)
class Role(BaseObject):
pass
class Category(BaseObject):
pass
class WcsApi(object):
def __init__(self, url, orig, key, name_id=None, verify=True, slugs=None, batch_size=1000):
self.url = url
self.orig = orig
self.key = key
self.verify = verify
self.cache = {}
self.slugs = slugs or []
self.batch_size = batch_size
self.name_id = name_id
@property
def formdefs_url(self):
return urlparse.urljoin(self.url, 'api/formdefs/')
@property
def forms_url(self):
return urlparse.urljoin(self.url, 'api/forms/')
@property
def roles_url(self):
return urlparse.urljoin(self.url, 'api/roles')
def build_url(self, url_parts):
url = url_parts[0]
for url_part in url_parts[1:]:
url = urlparse.urljoin(url, url_part)
return url
def get_json(self, *url_parts):
url = self.build_url(url_parts)
params = {'orig': self.orig}
if self.name_id:
params['NameID'] = self.name_id
query_string = urlparse.urlencode(params)
presigned_url = url + ('&' if '?' in url else '?') + query_string
if presigned_url in self.cache:
return self.cache[presigned_url]
signed_url = signature.sign_url(presigned_url, self.key)
try:
response = requests.get(signed_url, verify=self.verify)
response.raise_for_status()
except requests.RequestException as e:
raise WcsApiError('GET request failed', signed_url, e)
else:
try:
content = response.json()
self.cache[presigned_url] = content
return content
except ValueError as e:
raise WcsApiError('Invalid JSON content', signed_url, e)
@property
def roles(self):
return [Role(wcs_api=self, **d) for d in self.get_json(self.roles_url)['data']]
@property
def formdefs(self):
result = self.get_json(self.formdefs_url)
if isinstance(result, dict):
if result['err'] == 0:
data = result['data']
else:
logger.error(u'could not retrieve formdefs from %s, err_desc: %s',
self.formdefs_url, result.get('err_desc'))
return []
else:
data = result
return [FormDef(wcs_api=self, **d) for d in data
if not self.slugs or d['slug'] in self.slugs]
@property
def categories(self):
d = {}
for f in self.formdefs:
if hasattr(f.schema, 'category'):
d[f.schema.category_id] = f.schema.category
return [Category(wcs_api=self, id=k, name=v) for k, v in d.items()]
def get_formdatas(self, slug, full=True):
offset = 0
limit = self.batch_size
while True:
data = self.get_json(
self.forms_url,
slug + '/list?anonymise&full=%s&offset=%d&limit=%d' % (
'on' if full else 'off', offset, limit))
for d in data:
# w.c.s. had a bug where some formdata lost their draft status, skip them
if not d.get('receipt_time'):
continue
yield FormData(wcs_api=self, formdef_slug=slug, **d)
if len(data) < limit:
break
offset += limit
def get_schema(self, slug):
json_schema = self.get_json(self.formdefs_url, slug + '/', 'schema?anonymise')
return Schema(wcs_api=self, **json_schema)

98
cd06/script.py Normal file
View File

@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
import sys
import os
import shutil
from cd06.wcs_api import WcsApi
from config import WCS_URL, WCS_ORIG, WCS_APIKEY, USER_UUID
api = WcsApi(WCS_URL,
WCS_ORIG,
WCS_APIKEY,
name_id=USER_UUID)
target_dir = sys.argv[1]
FORMDEFS = {
'premiere-demande-d-apa-a-domicile': {
'DIRECTORY': 'premiere-demande-apa',
'MAPPINGS': [
# ('Nom', 'Variable', 'Nommage fichier',)
('Mandat de délégation signé', 'mandat', 'MANDAT_DELEGATION.PDF',),
('Copie du jugement mesure de protection par une personne',
'jugement_mesure_protection_personne',
'JUGEMENT_MESURE_DE_PROTECTION_PERSONNE.PDF',),
('Copie du jugement mesure de protection par un organisme personne',
'jugement_mesure_protection_organisme',
'JUGEMENT_MESURE_DE_PROTECTION_ORGANISME.PDF',),
('Justificatif d\'identité demandeur', 'piece_identite_demandeur', 'JUSTIF_IDENT_DEMANDEUR.PDF',),
('RIB demandeur', 'rib_demandeur', 'RIB_RIP_DEMANDEUR.PDF',),
('Taxe foncière 1er bien', 'taxe_fonciere_1', 'TAXE_FONCIERE_BIEN_1.PDF',),
('Taxe foncière 2ème bien', 'taxe_fonciere_2', 'TAXE_FONCIERE_BIEN_2.PDF',),
('Taxe foncière 3ème bien', 'taxe_fonciere_3', 'TAXE_FONCIERE_BIEN_3.PDF',),
('Avis d\'imposition', 'avis_imposition', 'AVIS_IMPOSITION.PDF',),
('Document supplémentaire 1', 'document_supplementaire_1', 'DOCUMENT_SUPPLEMENTAIRE_1.PDF',),
('Document supplémentaire 2', 'document_supplementaire_2', 'DOCUMENT_SUPPLEMENTAIRE_2.PDF',),
('Document supplémentaire 3', 'document_supplementaire_3', 'DOCUMENT_SUPPLEMENTAIRE_3.PDF',),
]
},
'aggravation-revision-apa': {
'DIRECTORY': 'aggravation-apa',
'MAPPINGS': [
# ('Nom', 'Variable', 'Nommage fichier',)
('Mandat de délégation signé', 'mandat', 'MANDAT_DELEGATION.PDF',),
('Copie du jugement mesure de protection par une personne',
'jugement_mesure_protection_personne',
'JUGEMENT_MESURE_DE_PROTECTION_PERSONNE.PDF',),
('Copie du jugement mesure de protection par un organisme personne',
'jugement_mesure_protection_organisme',
'JUGEMENT_MESURE_DE_PROTECTION_ORGANISME.PDF',),
('Taxe foncière 1er bien', 'taxe_fonciere_1', 'TAXE_FONCIERE_BIEN_1.PDF',),
('Taxe foncière 2ème bien', 'taxe_fonciere_2', 'TAXE_FONCIERE_BIEN_2.PDF',),
('Taxe foncière 3ème bien', 'taxe_fonciere_3', 'TAXE_FONCIERE_BIEN_3.PDF',),
('Avis d\'imposition', 'avis_imposition', 'AVIS_IMPOSITION.PDF',),
]
}
}
print('Moving files into', target_dir)
tmp_dir = os.path.join(target_dir, 'tmp')
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
for formdef in api.formdefs:
if formdef.slug not in FORMDEFS:
continue
print(formdef.title)
MAPPINGS = FORMDEFS[formdef.slug]['MAPPINGS']
DIRECTORY = FORMDEFS[formdef.slug]['DIRECTORY']
formdef_dir = os.path.join(target_dir, DIRECTORY)
if not os.path.exists(formdef_dir):
os.makedirs(formdef_dir)
for formdata in formdef.datas:
formdata_dir = os.path.join(formdef_dir, str(formdata.id))
formdata_tmp_dir = os.path.join(tmp_dir, DIRECTORY, str(formdata.id))
if os.path.exists(formdata_dir):
continue
try:
shutil.rmtree(formdata_tmp_dir, True)
os.makedirs(formdata_tmp_dir)
print(' Form', formdata.id)
for name, key, filename in MAPPINGS:
value = formdata[key]
if not value:
continue
print(' -', name, ':', value.filename)
prefix, suffix = os.path.splitext(filename)
new_filename = prefix + os.path.splitext(value.filename)[1]
attachment_path = os.path.join(formdata_tmp_dir, new_filename)
print('Putting in', attachment_path)
with open(attachment_path, 'wb') as f:
f.write(value.content)
os.rename(formdata_tmp_dir, formdata_dir)
finally:
shutil.rmtree(formdata_tmp_dir, True)
print()