passerelle/passerelle/utils/wcs.py

725 lines
23 KiB
Python

# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2019 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
import base64
import collections
import contextlib
import copy
import datetime
import json
import logging
from urllib import parse as urlparse
import isodate
import requests
from django import forms
from django.conf import settings
from django.core.cache import cache
from django.db import models
from passerelle.base import signature
class WcsApiError(Exception):
pass
class JSONFile:
def __init__(self, d):
self.d = d
@property
def filename(self):
return self.d.get('filename', '')
@property
def content_type(self):
return self.d.get('content_type', 'application/octet-stream')
@property
def content(self):
return base64.b64decode(self.d['content'])
def to_dict(o):
if hasattr(o, 'to_dict'):
return o.to_dict()
elif isinstance(o, dict):
return {k: to_dict(v) for k, v in o.items()}
elif isinstance(o, (list, tuple)):
return [to_dict(v) for v in o]
else:
return o
class BaseObject:
def __init__(self, wcs_api, **kwargs):
self._wcs_api = wcs_api
self.__dict__.update(**kwargs)
def to_dict(self):
d = collections.OrderedDict()
for key, value in self.__dict__.items():
if key[0] == '_':
continue
d[key] = to_dict(value)
return d
class FormDataWorkflow(BaseObject):
status = None
fields = None
def __init__(self, wcs_api, **kwargs):
super().__init__(wcs_api, **kwargs)
if self.status is not None:
self.status = BaseObject(wcs_api, **self.status)
self.fields = self.fields or {}
class EvolutionUser(BaseObject):
id = None
name = None
NameID = None
email = None
class Evolution(BaseObject):
who = None
status = None
parts = None
def __init__(self, wcs_api, **kwargs):
super().__init__(wcs_api, **kwargs)
self.time = isodate.parse_datetime(self.time)
if self.parts:
self.parts = [BaseObject(wcs_api, **part) for part in self.parts]
if self.who:
self.who = EvolutionUser(wcs_api, **self.who)
class FormData(BaseObject):
geolocations = None
evolution = None
submissions = None
workflow = None
roles = None
with_files = False
def __init__(self, wcs_api, forms, **kwargs):
self.forms = forms
super().__init__(wcs_api, **kwargs)
self.receipt_time = isodate.parse_datetime(self.receipt_time)
if self.submissions:
self.submission = BaseObject(wcs_api, **self.submission)
if self.workflow:
self.workflow = FormDataWorkflow(wcs_api, **self.workflow)
self.evolution = [Evolution(wcs_api, **evo) for evo in self.evolution or []]
self.functions = {}
self.concerned_roles = []
self.action_roles = []
for function in self.roles or []:
roles = [Role(wcs_api, **r) for r in self.roles[function]]
if function == 'concerned':
self.concerned_roles.extend(roles)
elif function == 'actions':
self.concerned_roles.extend(roles)
else:
try:
self.functions[function] = roles[0]
except IndexError:
self.functions[function] = None
if 'roles' in self.__dict__:
del self.roles
def __str__(self):
return f'{self.formdef} - {self.id}'
@property
def full(self):
if self.with_files:
return self
if not hasattr(self, '_full'):
self._full = self.forms[self.id]
return self._full
@property
def anonymized(self):
return self.forms.anonymized[self.id]
@property
def endpoint_delay(self):
"""Compute delay as the time when the last not endpoint status precedes an endpoint
status."""
statuses_map = self.formdef.schema.workflow.statuses_map
s = 0
for evo in self.evolution[::-1]:
if evo.status:
try:
status = statuses_map[evo.status]
except KeyError: # happen when workflow has changed
return
if status.endpoint:
s = 1
last = evo.time - self.receipt_time
else:
if s == 1:
return last
else:
return
def __getitem__(self, key):
value = self.full.fields.get(key)
# unserialize files
if isinstance(value, dict) and 'content' in value:
return JSONFile(value)
return value
class Workflow(BaseObject):
statuses = None
fields = None
def __init__(self, wcs_api, **kwargs):
super().__init__(wcs_api, **kwargs)
self.statuses = [BaseObject(wcs_api, **v) for v in (self.statuses or [])]
assert not hasattr(self.statuses[0], 'startpoint'), 'startpoint is exported by w.c.s. FIXME'
for status in self.statuses:
status.startpoint = False
self.statuses[0].startpoint = True
self.statuses_map = dict((s.id, s) for s in self.statuses)
self.fields = [Field(wcs_api, **field) for field in (self.fields or [])]
class Field(BaseObject):
items = None
options = None
varname = None
in_filters = False
anonymise = None
class Schema(BaseObject):
category_id = None
category = None
geolocations = None
def __init__(self, wcs_api, **kwargs):
super().__init__(wcs_api, **kwargs)
self.workflow = Workflow(wcs_api, **self.workflow)
self.fields = [Field(wcs_api, **f) for f in self.fields]
self.geolocations = sorted((k, v) for k, v in (self.geolocations or {}).items())
class FormDatas:
def __init__(self, wcs_api, formdef, full=False, anonymize=False, batch=1000):
self.wcs_api = wcs_api
self.formdef = formdef
self._full = full
self.anonymize = anonymize
self.batch = batch
def __getitem__(self, slice_or_id):
# get batch of forms
if isinstance(slice_or_id, slice):
def helper():
if slice_or_id.stop <= slice_or_id.start or slice_or_id.step:
raise ValueError('invalid slice %s' % slice_or_id)
offset = slice_or_id.start
limit = slice_or_id.stop - slice_or_id.start
url_parts = ['api/forms/{self.formdef.slug}/list'.format(self=self)]
query = {}
query['full'] = 'on' if self._full else 'off'
if offset:
query['offset'] = str(offset)
if limit:
query['limit'] = str(limit)
if self.anonymize:
query['anonymise'] = 'on'
url_parts.append('?%s' % urlparse.urlencode(query))
for d in self.wcs_api.get_json(*url_parts):
# w.c.s. had a bug where some formdata lost their draft status, skip them
if not d.get('receipt_time'):
continue
yield FormData(wcs_api=self.wcs_api, forms=self, formdef=self.formdef, **d)
return helper()
# or get one form
else:
url_parts = ['api/forms/{formdef.slug}/{id}/'.format(formdef=self.formdef, id=slice_or_id)]
if self.anonymize:
url_parts.append('?anonymise=true')
d = self.wcs_api.get_json(*url_parts)
return FormData(wcs_api=self.wcs_api, forms=self, formdef=self.formdef, with_files=True, **d)
@property
def full(self):
forms = copy.copy(self)
forms._full = True
return forms
@property
def anonymized(self):
forms = copy.copy(self)
forms.anonymize = True
return forms
def batched(self, batch):
forms = copy.copy(self)
forms.batch = batch
return forms
def __iter__(self):
start = 0
while True:
empty = True
for formdef in self[start : start + self.batch]:
empty = False
yield formdef
if empty:
break
start += self.batch
def __len__(self):
return len(list((o for o in self)))
class CancelSubmitError(Exception):
pass
class FormDefSubmit:
formdef = None
data = None
user_email = None
user_name_id = None
backoffice_submission = False
submission_channel = None
submission_context = None
draft = False
def __init__(self, wcs_api, formdef, **kwargs):
self.wcs_api = wcs_api
self.formdef = formdef
self.data = {}
self.__dict__.update(kwargs)
def payload(self):
d = {
'data': self.data.copy(),
}
if self.draft:
d.setdefault('meta', {})['draft'] = True
if self.backoffice_submission:
d.setdefault('meta', {})['backoffice-submission'] = True
if self.submission_context:
d['context'] = self.submission_context
if self.submission_channel:
d.setdefault('context', {})['channel'] = self.submission_channel
if self.user_email:
d.setdefault('user', {})['email'] = self.user_email
if self.user_name_id:
d.setdefault('user', {})['NameID'] = self.user_name_id
return d
def set(self, field, value, **kwargs):
if isinstance(field, Field):
varname = field.varname
if not varname:
raise ValueError('field has no varname, submit is impossible')
else:
varname = field
try:
field = [f for f in self.formdef.schema.fields if f.varname == varname][0]
except IndexError:
raise ValueError('no field for varname %s' % varname)
if value is None or value == {} or value == []:
self.data.pop(varname, None)
elif hasattr(self, '_set_type_%s' % field.type):
getattr(self, '_set_type_%s' % field.type)(varname=varname, field=field, value=value, **kwargs)
else:
self.data[varname] = value
def _set_type_item(self, varname, field, value, **kwargs):
if isinstance(value, dict):
if not set(value).issuperset(set(['id', 'text'])):
raise ValueError('item field value must have id and text value')
# clean previous values
self.data.pop(varname, None)
self.data.pop(varname + '_raw', None)
self.data.pop(varname + '_structured', None)
if isinstance(value, dict):
# structured & display values
self.data[varname + '_raw'] = value['id']
self.data[varname] = value['text']
if len(value) > 2:
self.data[varname + '_structured'] = value
else:
# raw id in varname
self.data[varname] = value
def _set_type_items(self, varname, field, value, **kwargs):
if not isinstance(value, list):
raise TypeError('%s is an ItemsField it needs a list as value' % varname)
has_dict = False
for choice in value:
if isinstance(value, dict):
if not set(value).issuperset(set(['id', 'text'])):
raise ValueError('items field values must have id and text value')
has_dict = True
if has_dict:
if not all(isinstance(choice, dict) for choice in value):
raise ValueError('ItemsField value must be all structured or none')
# clean previous values
self.data.pop(varname, None)
self.data.pop(varname + '_raw', None)
self.data.pop(varname + '_structured', None)
if has_dict:
raw = self.data[varname + '_raw'] = []
display = self.data[varname] = []
structured = self.data[varname + '_structured'] = []
for choice in value:
raw.append(choice['id'])
display.append(choice['text'])
structured.append(choice)
else:
self.data[varname] = value[:]
def _set_type_file(self, varname, field, value, **kwargs):
filename = kwargs.get('filename')
content_type = kwargs.get('content_type', 'application/octet-stream')
if hasattr(value, 'read'):
content = base64.b64encode(value.read())
elif isinstance(value, bytes):
content = base64.b64encode(value)
elif isinstance(value, dict):
if not set(value).issuperset(set(['filename', 'content'])):
raise ValueError('file field needs a dict value with filename and content')
content = value['content']
filename = value['filename']
content_type = value.get('content_type', content_type)
if not filename:
raise ValueError('missing filename')
self.data[varname] = {
'filename': filename,
'content': content,
'content_type': content_type,
}
def _set_type_date(self, varname, field, value):
if isinstance(value, str):
value = datetime.datetime.strptime(value, '%Y-%m-%d').date()
if isinstance(value, datetime.datetime):
value = value.date()
if isinstance(value, datetime.date):
value = value.strftime('%Y-%m-%d')
self.data[varname] = value
def _set_type_map(self, varname, field, value):
if not isinstance(value, dict):
raise TypeError('value must be a dict for a map field')
if set(value) != set(['lat', 'lon']):
raise ValueError('map field expect keys lat and lon')
self.data[varname] = value
def _set_type_bool(self, varname, field, value):
if isinstance(value, str):
value = value.lower().strip() in ['yes', 'true', 'on']
if not isinstance(value, bool):
raise TypeError('value must be a boolean or a string true, yes, on, false, no, off')
self.data[varname] = value
def cancel(self):
raise CancelSubmitError
class FormDef(BaseObject):
geolocations = None
def __init__(self, wcs_api, **kwargs):
self._wcs_api = wcs_api
self.__dict__.update(**kwargs)
def __str__(self):
return self.title
@property
def formdatas(self):
return FormDatas(wcs_api=self._wcs_api, formdef=self)
@property
def schema(self):
if not hasattr(self, '_schema'):
d = self._wcs_api.get_json(f'api/formdefs/{self.slug}/schema')
self._schema = Schema(self._wcs_api, **d)
return self._schema
@contextlib.contextmanager
def submit(self, **kwargs):
submitter = FormDefSubmit(wcs_api=self._wcs_api, formdef=self, **kwargs)
try:
yield submitter
except CancelSubmitError:
return
payload = submitter.payload()
d = self._wcs_api.post_json(payload, f'api/formdefs/{self.slug}/submit')
if d['err'] != 0:
raise WcsApiError('submited returned an error: %s' % d)
submitter.result = BaseObject(self._wcs_api, **d['data'])
class Role(BaseObject):
pass
class Category(BaseObject):
pass
class WcsObjects:
url = None
object_class = None
def __init__(self, wcs_api):
self.wcs_api = wcs_api
def __getitem__(self, slug):
# pylint: disable=isinstance-second-argument-not-valid-type
if isinstance(slug, self.object_class):
slug = slug.slug
for instance in self:
if instance.slug == slug:
return instance
raise KeyError('no instance with slug %r' % slug)
def __iter__(self):
for d in self.wcs_api.get_json(self.url)['data']:
# pylint: disable=not-callable
yield self.object_class(wcs_api=self.wcs_api, **d)
def __len__(self):
return len(list((o for o in self)))
class Roles(WcsObjects):
# Paths are not coherent :/
url = 'api/roles'
object_class = Role
class FormDefs(WcsObjects):
url = 'api/formdefs/'
object_class = FormDef
class Categories(WcsObjects):
url = 'api/categories/'
object_class = Category
class WcsApi:
def __init__(
self, url, email=None, name_id=None, batch_size=1000, session=None, logger=None, orig=None, key=None
):
self.url = url
self.batch_size = batch_size
self.email = email
self.name_id = name_id
self.requests = session or requests.Session()
self.logger = logger or logging.getLogger(__name__)
self.orig = orig
self.key = key
def _build_url(self, url_parts):
url = self.url
for url_part in url_parts:
url = urlparse.urljoin(url, url_part)
return url
def get_json(self, *url_parts):
url = self._build_url(url_parts)
params = {}
if self.email:
params['email'] = self.email
if self.name_id:
params['NameID'] = self.name_id
if self.orig:
params['orig'] = self.orig
query_string = urlparse.urlencode(params)
complete_url = url + ('&' if '?' in url else '?') + query_string
final_url = complete_url
if self.key:
final_url = signature.sign_url(final_url, self.key)
try:
response = self.requests.get(final_url)
response.raise_for_status()
except requests.RequestException as e:
content = getattr(getattr(e, 'response', None), 'content', None)
raise WcsApiError('GET request failed', final_url, e, content)
else:
try:
return response.json()
except ValueError as e:
raise WcsApiError('Invalid JSON content', final_url, e)
def post_json(self, data, *url_parts, headers=None):
headers = headers or {'content-type': 'application/json'}
url = self._build_url(url_parts)
params = {}
if self.email:
params['email'] = self.email
if self.name_id:
params['NameID'] = self.name_id
if self.orig:
params['orig'] = self.orig
query_string = urlparse.urlencode(params)
complete_url = url + ('&' if '?' in url else '?') + query_string
final_url = complete_url
if self.key:
final_url = signature.sign_url(final_url, self.key)
try:
response = self.requests.post(final_url, data=json.dumps(data), headers=headers)
response.raise_for_status()
except requests.RequestException as e:
content = getattr(getattr(e, 'response', None), 'content', None)
raise WcsApiError('POST request failed', final_url, e, content)
else:
try:
return response.json()
except ValueError as e:
raise WcsApiError('Invalid JSON content', final_url, e)
@property
def roles(self):
return Roles(self)
@property
def formdefs(self):
return FormDefs(self)
@property
def categories(self):
return Categories(self)
def get_wcs_choices(session=None):
cached_choices = cache.get('wcs-formdef-choices')
if cached_choices is None:
known_services = getattr(settings, 'KNOWN_SERVICES', {})
def helper():
for key, value in known_services.get('wcs', {}).items():
api = WcsApi(url=value['url'], orig=value['orig'], key=value['secret'], session=session)
for formdef in list(api.formdefs):
title = '%s - %s' % (value['title'], formdef.title)
yield key, formdef.slug, title
cached_choices = sorted(helper(), key=lambda x: x[2])
cache.set('wcs-formdef-choices', cached_choices, 600)
choices = [('', '---------')]
for wcs_slug, formdef_slug, title in cached_choices:
choices.append((FormDefRef(wcs_slug, formdef_slug), title))
return choices
class FormDefRef:
_formdef = None
_api = None
session = None
def __init__(self, value1, value2=None):
if value2:
self.wcs_slug, self.formdef_slug = value1, value2
else:
self.wcs_slug, self.formdef_slug = str(value1).rsplit(':', 1)
@property
def api(self):
if not self._api:
config = settings.KNOWN_SERVICES['wcs'].get(self.wcs_slug)
self._api = WcsApi(
url=config['url'], orig=config['orig'], key=config['secret'], session=self.session
)
return self._api
@property
def formdef(self):
if not self._formdef:
self._formdef = self.api.formdefs[self.formdef_slug]
return self._formdef
def __getattr__(self, name):
return getattr(self.formdef, name)
def __str__(self):
return '%s:%s' % (self.wcs_slug, self.formdef_slug)
def __eq__(self, other):
if not other:
return False
if not hasattr(other, 'wcs_slug'):
other = FormDefRef(other)
return self.wcs_slug == other.wcs_slug and self.formdef_slug == other.formdef_slug
def __ne__(self, other):
return not self.__eq__(other)
def __deepcopy__(self, memo):
return self.__class__(self.wcs_slug, self.formdef_slug)
class FormDefFormField(forms.TypedChoiceField):
def __init__(self, **kwargs):
super().__init__(choices=self.get_formdef_choices, coerce=FormDefRef, **kwargs)
def get_formdef_choices(self):
requests = getattr(self, 'requests', None)
return get_wcs_choices(requests)
class FormDefField(models.Field):
def get_internal_type(self):
return 'TextField'
def from_db_value(self, value, *args, **kwargs):
return self.to_python(value)
def to_python(self, value):
if not value:
return None
if isinstance(value, FormDefRef):
return value
return FormDefRef(value)
def get_prep_value(self, value):
if not value:
return ''
return str(value)
def formfield(self, **kwargs):
defaults = {
'form_class': FormDefFormField,
}
defaults.update(kwargs)
return super().formfield(**defaults)