376 lines
14 KiB
Python
376 lines
14 KiB
Python
# passerelle - uniform access to multiple data sources and services
|
|
# Copyright (C) 2022 Entr'ouvert
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify it
|
|
# under the terms of the GNU Affero General Public License as published
|
|
# by the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import base64
|
|
|
|
import requests
|
|
from django.core.files.base import ContentFile
|
|
from django.db import models, transaction
|
|
from django.db.models import JSONField, Q
|
|
from django.http import Http404, HttpResponse
|
|
from django.urls import reverse
|
|
from django.utils.timezone import localtime, now
|
|
from django.utils.translation import gettext_lazy as _
|
|
|
|
from passerelle.base.models import BaseResource, HTTPResource
|
|
from passerelle.utils.api import endpoint
|
|
from passerelle.utils.json import datasource_array_schema, datasource_schema, response_schema
|
|
|
|
|
|
class UpdateError(Exception):
|
|
pass
|
|
|
|
|
|
class Resource(BaseResource, HTTPResource):
|
|
category = _('Business Process Connectors')
|
|
|
|
url = models.URLField(_('Webservice Base URL'))
|
|
api_key = models.CharField(_('API Key'), max_length=512)
|
|
|
|
log_requests_errors = False
|
|
|
|
class Meta:
|
|
verbose_name = _('Foederis connector')
|
|
verbose_name_plural = _('Foederis connectors')
|
|
|
|
@property
|
|
def referentiels_documents(self):
|
|
return self.documents.filter(external_id__in=[document_id for document_id, _ in self.REFERENTIELS])
|
|
|
|
@property
|
|
def announces_documents(self):
|
|
return self.documents.filter(external_id__startswith='announce-')
|
|
|
|
@property
|
|
def last_update_referentiels(self):
|
|
return self.referentiels_documents.aggregate(last_update=models.Min('updated'))['last_update']
|
|
|
|
@property
|
|
def last_update_announces(self):
|
|
return self.announces_documents.aggregate(last_update=models.Max('updated'))['last_update']
|
|
|
|
def http_request(self, method, path, **kwargs):
|
|
kwargs.setdefault('headers', {})['API-Key'] = self.api_key
|
|
url = self.url + path
|
|
response = self.requests.request(method, url, **kwargs)
|
|
response.raise_for_status()
|
|
try:
|
|
data = response.json()
|
|
except ValueError:
|
|
raise requests.RequestException('content is not JSON')
|
|
if data.get('code') != 200:
|
|
raise requests.RequestException('code field is not 200, message=%s' % data.get('message'))
|
|
return data.get('results', [])
|
|
|
|
def update_referentiel(self, document_id, path):
|
|
try:
|
|
results = self.http_request('GET', f'data/{path}?viewIntegrationName=api_publik')
|
|
except requests.RequestException:
|
|
raise UpdateError(_('Service is unavailable'))
|
|
|
|
if not results:
|
|
return
|
|
data = [{'id': r['name'], 'text': r['name']} for r in results]
|
|
self.documents.update_or_create(defaults={'data': data}, external_id=document_id)
|
|
|
|
ANNOUNCES_FIELDS = [
|
|
# response_field, document_field
|
|
('id', 'id'),
|
|
('date_publication', 'date'),
|
|
('date_fin_publication', 'date_fin_publication'),
|
|
('collectivite', 'collectivite'),
|
|
('direction', 'direction'),
|
|
('intitule_annonce', 'intitule'),
|
|
('orientation_recrutement_dgrh', 'orientation_recrutement'),
|
|
('cadre_emploi_depuis_ddr', 'cadre_emploi'),
|
|
('type_emploi_ddr', 'type_emploi'),
|
|
('categorie_ddr', 'categorie'),
|
|
('filiere_ddr', 'filiere'),
|
|
('intitule_structure_pour_offre', 'intitule_direction'),
|
|
('contenu_du_chapeau', 'chapeau'),
|
|
('missions_activites', 'description'),
|
|
('profil_recherche', 'profil'),
|
|
('informations_complmentaires', 'informations_complementaires'),
|
|
('reference_offre', 'reference_offre'),
|
|
]
|
|
|
|
DEMANDE_DE_PERSONNEL_FIELDS = [
|
|
# response_field, document_field
|
|
('missions', 'description'),
|
|
('profil_requis', 'profil'),
|
|
]
|
|
|
|
ANNOUNCE_SCHEMA = {
|
|
'type': 'object',
|
|
'properties': {field: {'type': 'string'} for dummy, field in ANNOUNCES_FIELDS},
|
|
}
|
|
ANNOUNCE_SCHEMA['properties']['pdf_url'] = {
|
|
'type': 'string',
|
|
'description': _('Public URL of the PDF announce'),
|
|
}
|
|
|
|
FIELD_ANNOUNCE_FKEY_DEMANDE_DE_PERSONNEL = 'R14848258'
|
|
|
|
def update_announce(self, response_announce):
|
|
document_data = {
|
|
document_field: response_announce.get(response_field)
|
|
for response_field, document_field in self.ANNOUNCES_FIELDS
|
|
}
|
|
file_content = None
|
|
|
|
# retrieve HTML content fields
|
|
if len(response_announce[self.FIELD_ANNOUNCE_FKEY_DEMANDE_DE_PERSONNEL]):
|
|
recrut_id = response_announce[self.FIELD_ANNOUNCE_FKEY_DEMANDE_DE_PERSONNEL][0]
|
|
try:
|
|
fields = ','.join([x[0] for x in self.DEMANDE_DE_PERSONNEL_FIELDS])
|
|
params = {
|
|
'filterName': 'id',
|
|
'filterValue': recrut_id,
|
|
'fieldList': fields,
|
|
'viewIntegrationName': 'api_publik',
|
|
}
|
|
results = self.http_request('GET', 'data/demande_de_personnel', params=params)
|
|
except requests.RequestException:
|
|
raise UpdateError(_('Service is unavailable'))
|
|
if len(results):
|
|
for response_field, document_field in self.DEMANDE_DE_PERSONNEL_FIELDS:
|
|
if response_field in results[0]:
|
|
document_data[document_field] = results[0][response_field]
|
|
|
|
document_data['id'] = announce_id = str(response_announce['id'])
|
|
external_id = f'announce-{announce_id}'
|
|
text = document_data['text'] = document_data['intitule']
|
|
if response_announce.get('pdf_ddr'):
|
|
try:
|
|
path = f'data/annonce/{announce_id}/fields/pdf_ddr?viewIntegrationName=api_publik'
|
|
results = self.http_request('GET', path)
|
|
except requests.RequestException:
|
|
raise UpdateError(_('Service is unavailable'))
|
|
if results:
|
|
file_content = base64.b64decode(results[0]['pdf_ddr']['fileData'])
|
|
|
|
with transaction.atomic(savepoint=False):
|
|
document, created = self.documents.get_or_create(external_id=external_id)
|
|
if document.data == document_data and document.text == text:
|
|
return
|
|
document.data = document_data
|
|
document.text = text
|
|
if document.pdf:
|
|
document.pdf.delete(save=False)
|
|
if file_content:
|
|
document.pdf.save(f'annonce-{announce_id}.pdf', ContentFile(file_content), save=False)
|
|
else:
|
|
document.pdf = None
|
|
document.save()
|
|
if created:
|
|
self.logger.info(_('Created announce %s') % announce_id)
|
|
else:
|
|
self.logger.info(_('Updated announce %s') % announce_id)
|
|
|
|
def update_announces(self):
|
|
try:
|
|
results = self.http_request('GET', 'data/annonce?viewIntegrationName=api_publik')
|
|
except requests.RequestException:
|
|
raise UpdateError(_('Service is unavailable'))
|
|
announces = []
|
|
for response_announce in results:
|
|
self.update_announce(response_announce)
|
|
announces.append('announce-%s' % response_announce['id'])
|
|
self.announces_documents.exclude(external_id__in=announces).delete()
|
|
|
|
REFERENTIELS = [
|
|
# document_id, path
|
|
('type_emploi', 'type_emploi'),
|
|
('categorie', 'categorie1'),
|
|
('filiere', 'Filiere'),
|
|
]
|
|
|
|
def update_referentiels(self):
|
|
for document_id, path in self.REFERENTIELS:
|
|
self.update_referentiel(document_id, path)
|
|
self.update_announces()
|
|
|
|
def hourly(self):
|
|
try:
|
|
self.update_referentiels()
|
|
except UpdateError as e:
|
|
self.logger.warning(_('Update failed: %s') % e)
|
|
else:
|
|
self.logger.info(_('Referentials updated.'))
|
|
|
|
@endpoint(
|
|
description=_('Get data source'),
|
|
long_description=_('Available datasources: %s')
|
|
% ', '.join(document_id.replace('_', '-') for document_id, dummy in REFERENTIELS),
|
|
name='ds',
|
|
pattern=r'^(?P<name>[a-z_-]+)/$',
|
|
example_pattern='{name}/',
|
|
perm='can_access',
|
|
parameters={
|
|
'name': {'description': _('Data source name'), 'example_value': 'type-emploi'},
|
|
},
|
|
json_schema_response=datasource_schema(),
|
|
)
|
|
def datasource(self, request, name):
|
|
name = name.replace('-', '_')
|
|
for document_id, dummy in self.REFERENTIELS:
|
|
if document_id == name:
|
|
break
|
|
else:
|
|
raise Http404
|
|
document = self.documents.filter(external_id=name).first()
|
|
if not document:
|
|
return {
|
|
'err': 0,
|
|
'data': [],
|
|
'last_update': None,
|
|
}
|
|
return {
|
|
'err': 0,
|
|
'data': document.data,
|
|
'last_update': localtime(document.updated).strftime('%F %T'),
|
|
}
|
|
|
|
@endpoint(
|
|
description=_('Retrieve announce\'s PDF'),
|
|
long_description=_('Do not use directly, use the pdf_url field of announces instead.'),
|
|
name='announce',
|
|
pattern=r'^(?P<announce_id>[0-9]+)/pdf/$',
|
|
example_pattern='{announce_id}/pdf/',
|
|
parameters={
|
|
'announce_id': {'description': _('Announce id'), 'example_value': '12345'},
|
|
},
|
|
)
|
|
def announce_pdf(self, request, announce_id):
|
|
# passerelle catch DoesNotExist and converts it to 404
|
|
document = self.documents.get(external_id=f'announce-{announce_id}')
|
|
with document.pdf.open() as fd:
|
|
return HttpResponse(fd, content_type='application/pdf')
|
|
|
|
@endpoint(
|
|
description=_('List announces'),
|
|
long_description=_(
|
|
'List published announces. Use unpublished=1 parameter to see all announces. When using id to retrieve a specific announce, filters are ignored.'
|
|
),
|
|
name='announce',
|
|
perm='can_access',
|
|
parameters={
|
|
'q': {'description': _('Free text search')},
|
|
'id': {'description': _('Get a specific announce')},
|
|
'type_emploi': {'description': _('Filter by job type')},
|
|
'categorie': {'description': _('Filter by job category')},
|
|
'filiere': {'description': _('Filter by job sector')},
|
|
'collectivite': {'description': _('Filter by collectivite')},
|
|
'unpublished': {'description': _('Add unpublished announces to the list')},
|
|
},
|
|
json_schema_response=response_schema(
|
|
{'type': 'array', 'items': ANNOUNCE_SCHEMA},
|
|
toplevel_properties={
|
|
'data_sources': {
|
|
'type': 'object',
|
|
'patternProperties': {
|
|
'': datasource_array_schema(),
|
|
},
|
|
}
|
|
},
|
|
),
|
|
)
|
|
def announce(
|
|
self,
|
|
request,
|
|
q=None,
|
|
id=None,
|
|
type_emploi=None,
|
|
categorie=None,
|
|
filiere=None,
|
|
collectivite=None,
|
|
unpublished=None,
|
|
):
|
|
unpublished = bool(unpublished and unpublished.lower() in ['1', 'true', 'on'])
|
|
qs = self.announces_documents
|
|
qs = qs.order_by('-data__date')
|
|
if id:
|
|
qs = qs.filter(external_id=f'announce-{id}')
|
|
else:
|
|
today = now().date().strftime('%Y-%m-%d')
|
|
if not unpublished:
|
|
qs = qs.filter(Q(data__date__isnull=True) | Q(data__date__lte=today))
|
|
qs = qs.filter(
|
|
Q(data__date_fin_publication__isnull=True) | Q(data__date_fin_publication__gte=today)
|
|
)
|
|
if q:
|
|
qs = qs.filter(data__intitule__icontains=q)
|
|
if type_emploi:
|
|
qs = qs.filter(data__type_emploi=type_emploi)
|
|
if categorie:
|
|
qs = qs.filter(data__categorie=categorie)
|
|
if filiere:
|
|
qs = qs.filter(data__filiere=filiere)
|
|
if collectivite:
|
|
qs = qs.filter(data__collectivite=collectivite)
|
|
data_sources = {document.external_id: document.data for document in self.referentiels_documents}
|
|
|
|
def pdf_url(request, document):
|
|
doc_id = document.external_id.split('-')[-1]
|
|
return request.build_absolute_uri(
|
|
reverse(
|
|
'generic-endpoint',
|
|
kwargs={
|
|
'connector': self.get_connector_slug(),
|
|
'slug': self.slug,
|
|
'endpoint': 'announce',
|
|
'rest': f'{doc_id}/pdf/',
|
|
},
|
|
)
|
|
)
|
|
|
|
return {
|
|
'err': 0,
|
|
'data': [dict(document.data, pdf_url=pdf_url(request, document)) for document in qs],
|
|
'data_sources': data_sources,
|
|
}
|
|
|
|
|
|
def upload_to(instance, filename):
|
|
return f'toulouse_foederis/{instance.resource.slug}/{filename}'
|
|
|
|
|
|
class Document(models.Model):
|
|
resource = models.ForeignKey(
|
|
verbose_name=_('Resource'),
|
|
to=Resource,
|
|
on_delete=models.CASCADE,
|
|
related_name='documents',
|
|
)
|
|
external_id = models.CharField(_('Key'), max_length=64, unique=True)
|
|
text = models.CharField(_('Text'), max_length=64, null=True)
|
|
data = JSONField(_('Data'), null=True)
|
|
pdf = models.FileField(_('PDF file'), upload_to=upload_to)
|
|
created = models.DateTimeField(_('Created'), auto_now_add=True)
|
|
updated = models.DateTimeField(_('Updated'), auto_now=True)
|
|
|
|
def __repr__(self):
|
|
return f'<Document "{self.external_id}">'
|
|
|
|
def delete(self, *args, **kwargs):
|
|
if self.pdf:
|
|
self.pdf.delete(save=False)
|
|
return super().delete(*args, **kwargs)
|
|
|
|
class Meta:
|
|
verbose_name = _('Foederis data')
|
|
verbose_name_plural = _('Foederis datas')
|