# passerelle - uniform access to multiple data sources and services # Copyright (C) 2019 Entr'ouvert # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . '''Gateway to API-Entreprise web-service from SGMAP: https://entreprise.api.gouv.fr ''' from six.moves.urllib_parse import urljoin import requests from django.db import models from django.utils.translation import ugettext_lazy as _ from django.utils.timezone import datetime, make_aware, timedelta from django.http import HttpResponse, Http404 from django.core import signing from django.urls import reverse from passerelle.base.models import BaseResource from passerelle.utils.api import endpoint from passerelle.utils.jsonresponse import APIError, exception_to_text from passerelle.views import WrongParameter DOCUMENT_SIGNATURE_MAX_AGE = timedelta(days=7) def normalize_results(data): timestamp_to_datetime = {} for key in data: if isinstance(data[key], dict): normalize_results(data[key]) if isinstance(data[key], list): if data[key] and isinstance(data[key][0], str): # keep only the first value data[key] = data[key][0] else: for item in data[key]: if isinstance(item, dict): normalize_results(item) if key.startswith('date') and not key.endswith('timestamp'): if isinstance(data[key], int): try: data[key] = datetime.fromtimestamp(int(data[key])).date() except (ValueError, TypeError): pass if key.endswith('timestamp'): # timestamps can be integers or strings or null # convert only if it's a positive integer try: tstamp = int(data[key]) except (ValueError, TypeError): pass else: if tstamp > 0: try: aware_date = make_aware(datetime.fromtimestamp(int(data[key]))) timestamp_to_datetime[key[: -len('timestamp')] + 'datetime'] = aware_date except (ValueError, TypeError): pass # add converted timestamps to initial data data.update(timestamp_to_datetime) class APIEntreprise(BaseResource): log_requests_errors = False url = models.URLField(_('API URL'), max_length=256, default='https://entreprise.api.gouv.fr/v2/') token = models.CharField(max_length=1024, verbose_name=_('API token')) recipient = models.CharField( max_length=1024, verbose_name=_('Recipient'), blank=False, help_text=_('default value') ) category = _('Business Process Connectors') class Meta: verbose_name = _('API Entreprise') def get(self, path, **kwargs): params = {'token': self.token} for param in ('context', 'object'): if not kwargs.get(param): raise WrongParameter([param], []) params[param] = kwargs[param] params['recipient'] = kwargs.get('recipient') or self.recipient if kwargs.get('non_diffusables'): params['non_diffusables'] = 'true' url = urljoin(self.url, path) try: response = self.requests.get(url, data=params, cache_duration=300) except requests.RequestException as e: raise APIError(u'API-entreprise connection error: %s' % exception_to_text(e), data=[]) try: data = response.json() except ValueError as e: content = response.text[:1000] raise APIError( u'API-entreprise returned non-JSON content with status %s: %s' % (response.status_code, content), data={ 'status_code': response.status_code, 'exception': exception_to_text(e), 'content': content, }, ) if response.status_code != 200: if data.get('error') == 'not_found': return { 'err': 1, 'err_desc': data.get('message', 'not-found'), } raise APIError( u'API-entreprise returned a non 200 status %s: %s' % (response.status_code, data), data={ 'status_code': response.status_code, 'content': data, }, ) normalize_results(data) return { 'err': 0, 'data': data, } # description of common endpoint parameters ASSOCIATION_PARAM = { 'description': _('association SIREN or RNA/WALDEC number'), 'example_value': '44317013900036', } CONTEXT_PARAM = {'description': _('request context: MPS, APS...'), 'example_value': 'APS'} MONTH_PARAM = { 'description': _('requested month'), 'example_value': '02', } OBJECT_PARAM = { 'description': _('request object: form number, file identifier...'), 'example_value': '42', } RECIPIENT_PARAM = { 'description': _('request recipient: usually customer number'), 'example_value': '44317013900036', } SIREN_PARAM = { 'description': _('firm SIREN number'), 'example_value': '443170139', } SIRET_PARAM = {'description': _('firms SIRET number'), 'example_value': '44317013900036'} YEAR_PARAM = { 'description': _('requested year'), 'example_value': '2019', } @endpoint( perm='can_access', pattern=r'(?P\w+)/$', example_pattern='{association_id}/', description=_('Get association\'s documents'), parameters={ 'association_id': ASSOCIATION_PARAM, 'object': OBJECT_PARAM, 'context': CONTEXT_PARAM, 'recipient': RECIPIENT_PARAM, }, ) def documents_associations(self, request, association_id, **kwargs): data = [] resp = self.get('documents_associations/%s/' % association_id, **kwargs) for item in resp['data'].get('documents', []): # ignore documents with no type if not item.get('type'): continue signature_elements = { 'url': item['url'], 'context': kwargs['context'], 'object': kwargs['object'], 'recipient': kwargs['recipient'], } signature = signing.dumps(signature_elements) document_url = request.build_absolute_uri( reverse( 'generic-endpoint', kwargs={ 'connector': self.get_connector_slug(), 'slug': self.slug, 'endpoint': 'document', 'rest': '%s/%s/' % (association_id, signature), }, ) ) item['id'] = item['timestamp'] item['text'] = item['type'] item['url'] = document_url data.append(item) # sort data by date data.sort(key=lambda i: i['id']) return {'err': 0, 'data': data} @endpoint( pattern=r'(?P\w+)/(?P[\:\w-]+)/$', example_pattern='{association_id}/{document_id}/', description=_('Get association\'s document'), parameters={ 'association_id': ASSOCIATION_PARAM, 'document_id': { 'description': _('document id'), 'example_value': 'A1500660325', }, 'object': OBJECT_PARAM, 'context': CONTEXT_PARAM, 'recipient': RECIPIENT_PARAM, }, ) def document(self, request, association_id, document_id, **kwargs): try: params = signing.loads(document_id, max_age=DOCUMENT_SIGNATURE_MAX_AGE) except signing.BadSignature: raise Http404('document not found') response = self.requests.get(params['url']) if response.ok: return HttpResponse(response, content_type='application/pdf') raise Http404('document not found') @endpoint( name='document_association', pattern=r'(?P\w+)/get-last/$', example_pattern='{association_id}/get-last/', description=_('Get association\'s last document of type'), parameters={ 'association_id': ASSOCIATION_PARAM, 'document_type': { 'description': _('document type'), 'example_value': 'Statuts', }, 'object': OBJECT_PARAM, 'context': CONTEXT_PARAM, 'recipient': RECIPIENT_PARAM, }, ) def get_last_document_of_type(self, request, association_id, document_type, **kwargs): document = None resp = self.get('documents_associations/%s/' % association_id, **kwargs) documents = [item for item in resp['data'].get('documents', []) if item.get('type') == document_type] if documents: documents.sort(key=lambda doc: doc['timestamp']) document = documents[-1] return {'data': document} @endpoint( perm='can_access', pattern=r'(?P\w+)/$', example_pattern='{siren}/', description=_('Get firm\'s data from Infogreffe'), parameters={ 'siren': SIREN_PARAM, 'object': OBJECT_PARAM, 'context': CONTEXT_PARAM, 'recipient': RECIPIENT_PARAM, }, ) def extraits_rcs(self, request, siren, **kwargs): return self.get('extraits_rcs_infogreffe/%s/' % siren, **kwargs) @endpoint( perm='can_access', pattern=r'(?P\w+)/$', example_pattern='{association_id}/', description=_('Get association\'s related informations'), parameters={ 'association_id': ASSOCIATION_PARAM, 'object': OBJECT_PARAM, 'context': CONTEXT_PARAM, 'recipient': RECIPIENT_PARAM, }, ) def associations(self, request, association_id, **kwargs): return self.get('associations/%s/' % association_id, **kwargs) @endpoint( perm='can_access', pattern=r'(?P\w+)/$', example_pattern='{siren}/', description=_('Get firm\'s related informations'), parameters={ 'siren': SIREN_PARAM, 'object': OBJECT_PARAM, 'context': CONTEXT_PARAM, 'recipient': RECIPIENT_PARAM, 'include_private': {'description': _('Include private informations'), 'example_value': 'true'}, }, ) def entreprises(self, request, siren, include_private=False, **kwargs): if len(siren) != 9: raise APIError(_('invalid SIREN length (must be 9 characters)')) if include_private: kwargs['non_diffusables'] = True return self.get('entreprises/%s/' % siren, **kwargs) @endpoint( perm='can_access', methods=['get'], pattern=r'(?P\w+)/$', example_pattern='{siret}/', description_get=_('Get firms\'s related informations'), parameters={ 'siret': SIRET_PARAM, 'object': OBJECT_PARAM, 'context': CONTEXT_PARAM, 'recipient': RECIPIENT_PARAM, }, ) def etablissements(self, request, siret, **kwargs): return self.get('etablissements/%s/' % siret, **kwargs) @endpoint( perm='can_access', methods=['get'], pattern=r'(?P\w+)/$', example_pattern='{siret}/', description_get=_('Get firms\'s financial year informations'), parameters={ 'siret': SIRET_PARAM, 'object': OBJECT_PARAM, 'context': CONTEXT_PARAM, 'recipient': RECIPIENT_PARAM, }, ) def exercices(self, request, siret, **kwargs): return self.get('exercices/%s/' % siret, **kwargs) @endpoint( perm='can_access', pattern=r'(?P\w+)/$', example_pattern='{siren}/', description=_('Get firm\'s annual workforce data'), parameters={ 'siren': SIREN_PARAM, 'object': OBJECT_PARAM, 'context': CONTEXT_PARAM, 'recipient': RECIPIENT_PARAM, }, ) def effectifs_annuels_acoss_covid(self, request, siren, **kwargs): if len(siren) != 9: raise APIError(_('invalid SIREN length (must be 9 characters)')) return self.get('effectifs_annuels_acoss_covid/%s/' % siren, **kwargs) @endpoint( perm='can_access', pattern=r'(?P\w+)/(?P\w+)/(?P\w+)/$', description=_('Get firm\'s monthly workforce data, by SIREN'), parameters={ 'year': YEAR_PARAM, 'month': MONTH_PARAM, 'siren': SIREN_PARAM, 'object': OBJECT_PARAM, 'context': CONTEXT_PARAM, 'recipient': RECIPIENT_PARAM, }, ) def entreprise_effectifs_mensuels_acoss_covid(self, request, year, month, siren, **kwargs): if len(siren) != 9: raise APIError(_('invalid SIREN length (must be 9 characters)')) month = month.zfill(2) return self.get( 'effectifs_mensuels_acoss_covid/%s/%s/entreprise/%s/' % (year, month, siren), **kwargs ) @endpoint( perm='can_access', pattern=r'(?P\w+)/(?P\w+)/(?P\w+)/$', description=_('Get firm\'s monthly workforce data, by SIRET'), parameters={ 'year': YEAR_PARAM, 'month': MONTH_PARAM, 'siret': SIRET_PARAM, 'object': OBJECT_PARAM, 'context': CONTEXT_PARAM, 'recipient': RECIPIENT_PARAM, }, ) def etablissement_effectifs_mensuels_acoss_covid(self, request, year, month, siret, **kwargs): month = month.zfill(2) return self.get( 'effectifs_mensuels_acoss_covid/%s/%s/etablissement/%s/' % (year, month, siret), **kwargs )