passerelle/passerelle/contrib/toulouse_foederis/models.py

376 lines
14 KiB
Python

# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2022 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import base64
import requests
from django.core.files.base import ContentFile
from django.db import models, transaction
from django.db.models import JSONField, Q
from django.http import Http404, HttpResponse
from django.urls import reverse
from django.utils.timezone import localtime, now
from django.utils.translation import gettext_lazy as _
from passerelle.base.models import BaseResource, HTTPResource
from passerelle.utils.api import endpoint
from passerelle.utils.json import datasource_array_schema, datasource_schema, response_schema
class UpdateError(Exception):
pass
class Resource(BaseResource, HTTPResource):
category = _('Business Process Connectors')
url = models.URLField(_('Webservice Base URL'))
api_key = models.CharField(_('API Key'), max_length=512)
log_requests_errors = False
class Meta:
verbose_name = _('Foederis connector')
verbose_name_plural = _('Foederis connectors')
@property
def referentiels_documents(self):
return self.documents.filter(external_id__in=[document_id for document_id, _ in self.REFERENTIELS])
@property
def announces_documents(self):
return self.documents.filter(external_id__startswith='announce-')
@property
def last_update_referentiels(self):
return self.referentiels_documents.aggregate(last_update=models.Min('updated'))['last_update']
@property
def last_update_announces(self):
return self.announces_documents.aggregate(last_update=models.Max('updated'))['last_update']
def http_request(self, method, path, **kwargs):
kwargs.setdefault('headers', {})['API-Key'] = self.api_key
url = self.url + path
response = self.requests.request(method, url, **kwargs)
response.raise_for_status()
try:
data = response.json()
except ValueError:
raise requests.RequestException('content is not JSON')
if data.get('code') != 200:
raise requests.RequestException('code field is not 200, message=%s' % data.get('message'))
return data.get('results', [])
def update_referentiel(self, document_id, path):
try:
results = self.http_request('GET', f'data/{path}?viewIntegrationName=api_publik')
except requests.RequestException:
raise UpdateError(_('Service is unavailable'))
if not results:
return
data = [{'id': r['name'], 'text': r['name']} for r in results]
self.documents.update_or_create(defaults={'data': data}, external_id=document_id)
ANNOUNCES_FIELDS = [
# response_field, document_field
('id', 'id'),
('date_publication', 'date'),
('date_fin_publication', 'date_fin_publication'),
('collectivite', 'collectivite'),
('direction', 'direction'),
('intitule_annonce', 'intitule'),
('orientation_recrutement_dgrh', 'orientation_recrutement'),
('cadre_emploi_depuis_ddr', 'cadre_emploi'),
('type_emploi_ddr', 'type_emploi'),
('categorie_ddr', 'categorie'),
('filiere_ddr', 'filiere'),
('intitule_structure_pour_offre', 'intitule_direction'),
('contenu_du_chapeau', 'chapeau'),
('missions_activites', 'description'),
('profil_recherche', 'profil'),
('informations_complmentaires', 'informations_complementaires'),
('reference_offre', 'reference_offre'),
]
DEMANDE_DE_PERSONNEL_FIELDS = [
# response_field, document_field
('missions', 'description'),
('profil_requis', 'profil'),
]
ANNOUNCE_SCHEMA = {
'type': 'object',
'properties': {field: {'type': 'string'} for dummy, field in ANNOUNCES_FIELDS},
}
ANNOUNCE_SCHEMA['properties']['pdf_url'] = {
'type': 'string',
'description': _('Public URL of the PDF announce'),
}
FIELD_ANNOUNCE_FKEY_DEMANDE_DE_PERSONNEL = 'R14848258'
def update_announce(self, response_announce):
document_data = {
document_field: response_announce.get(response_field)
for response_field, document_field in self.ANNOUNCES_FIELDS
}
file_content = None
# retrieve HTML content fields
if len(response_announce[self.FIELD_ANNOUNCE_FKEY_DEMANDE_DE_PERSONNEL]):
recrut_id = response_announce[self.FIELD_ANNOUNCE_FKEY_DEMANDE_DE_PERSONNEL][0]
try:
fields = ','.join([x[0] for x in self.DEMANDE_DE_PERSONNEL_FIELDS])
params = {
'filterName': 'id',
'filterValue': recrut_id,
'fieldList': fields,
'viewIntegrationName': 'api_publik',
}
results = self.http_request('GET', 'data/demande_de_personnel', params=params)
except requests.RequestException:
raise UpdateError(_('Service is unavailable'))
if len(results):
for response_field, document_field in self.DEMANDE_DE_PERSONNEL_FIELDS:
if response_field in results[0]:
document_data[document_field] = results[0][response_field]
document_data['id'] = announce_id = str(response_announce['id'])
external_id = f'announce-{announce_id}'
text = document_data['text'] = document_data['intitule']
if response_announce.get('pdf_ddr'):
try:
path = f'data/annonce/{announce_id}/fields/pdf_ddr?viewIntegrationName=api_publik'
results = self.http_request('GET', path)
except requests.RequestException:
raise UpdateError(_('Service is unavailable'))
if results:
file_content = base64.b64decode(results[0]['pdf_ddr']['fileData'])
with transaction.atomic(savepoint=False):
document, created = self.documents.get_or_create(external_id=external_id)
if document.data == document_data and document.text == text:
return
document.data = document_data
document.text = text
if document.pdf:
document.pdf.delete(save=False)
if file_content:
document.pdf.save(f'annonce-{announce_id}.pdf', ContentFile(file_content), save=False)
else:
document.pdf = None
document.save()
if created:
self.logger.info(_('Created announce %s') % announce_id)
else:
self.logger.info(_('Updated announce %s') % announce_id)
def update_announces(self):
try:
results = self.http_request('GET', 'data/annonce?viewIntegrationName=api_publik')
except requests.RequestException:
raise UpdateError(_('Service is unavailable'))
announces = []
for response_announce in results:
self.update_announce(response_announce)
announces.append('announce-%s' % response_announce['id'])
self.announces_documents.exclude(external_id__in=announces).delete()
REFERENTIELS = [
# document_id, path
('type_emploi', 'type_emploi'),
('categorie', 'categorie1'),
('filiere', 'Filiere'),
]
def update_referentiels(self):
for document_id, path in self.REFERENTIELS:
self.update_referentiel(document_id, path)
self.update_announces()
def hourly(self):
try:
self.update_referentiels()
except UpdateError as e:
self.logger.warning(_('Update failed: %s') % e)
else:
self.logger.info(_('Referentials updated.'))
@endpoint(
description=_('Get data source'),
long_description=_('Available datasources: %s')
% ', '.join(document_id.replace('_', '-') for document_id, dummy in REFERENTIELS),
name='ds',
pattern=r'^(?P<name>[a-z_-]+)/$',
example_pattern='{name}/',
perm='can_access',
parameters={
'name': {'description': _('Data source name'), 'example_value': 'type-emploi'},
},
json_schema_response=datasource_schema(),
)
def datasource(self, request, name):
name = name.replace('-', '_')
for document_id, dummy in self.REFERENTIELS:
if document_id == name:
break
else:
raise Http404
document = self.documents.filter(external_id=name).first()
if not document:
return {
'err': 0,
'data': [],
'last_update': None,
}
return {
'err': 0,
'data': document.data,
'last_update': localtime(document.updated).strftime('%F %T'),
}
@endpoint(
description=_('Retrieve announce\'s PDF'),
long_description=_('Do not use directly, use the pdf_url field of announces instead.'),
name='announce',
pattern=r'^(?P<announce_id>[0-9]+)/pdf/$',
example_pattern='{announce_id}/pdf/',
parameters={
'announce_id': {'description': _('Announce id'), 'example_value': '12345'},
},
)
def announce_pdf(self, request, announce_id):
# passerelle catch DoesNotExist and converts it to 404
document = self.documents.get(external_id=f'announce-{announce_id}')
with document.pdf.open() as fd:
return HttpResponse(fd, content_type='application/pdf')
@endpoint(
description=_('List announces'),
long_description=_(
'List published announces. Use unpublished=1 parameter to see all announces. When using id to retrieve a specific announce, filters are ignored.'
),
name='announce',
perm='can_access',
parameters={
'q': {'description': _('Free text search')},
'id': {'description': _('Get a specific announce')},
'type_emploi': {'description': _('Filter by job type')},
'categorie': {'description': _('Filter by job category')},
'filiere': {'description': _('Filter by job sector')},
'collectivite': {'description': _('Filter by collectivite')},
'unpublished': {'description': _('Add unpublished announces to the list')},
},
json_schema_response=response_schema(
{'type': 'array', 'items': ANNOUNCE_SCHEMA},
toplevel_properties={
'data_sources': {
'type': 'object',
'patternProperties': {
'': datasource_array_schema(),
},
}
},
),
)
def announce(
self,
request,
q=None,
id=None,
type_emploi=None,
categorie=None,
filiere=None,
collectivite=None,
unpublished=None,
):
unpublished = bool(unpublished and unpublished.lower() in ['1', 'true', 'on'])
qs = self.announces_documents
qs = qs.order_by('-data__date')
if id:
qs = qs.filter(external_id=f'announce-{id}')
else:
today = now().date().strftime('%Y-%m-%d')
if not unpublished:
qs = qs.filter(Q(data__date__isnull=True) | Q(data__date__lte=today))
qs = qs.filter(
Q(data__date_fin_publication__isnull=True) | Q(data__date_fin_publication__gte=today)
)
if q:
qs = qs.filter(data__intitule__icontains=q)
if type_emploi:
qs = qs.filter(data__type_emploi=type_emploi)
if categorie:
qs = qs.filter(data__categorie=categorie)
if filiere:
qs = qs.filter(data__filiere=filiere)
if collectivite:
qs = qs.filter(data__collectivite=collectivite)
data_sources = {document.external_id: document.data for document in self.referentiels_documents}
def pdf_url(request, document):
doc_id = document.external_id.split('-')[-1]
return request.build_absolute_uri(
reverse(
'generic-endpoint',
kwargs={
'connector': self.get_connector_slug(),
'slug': self.slug,
'endpoint': 'announce',
'rest': f'{doc_id}/pdf/',
},
)
)
return {
'err': 0,
'data': [dict(document.data, pdf_url=pdf_url(request, document)) for document in qs],
'data_sources': data_sources,
}
def upload_to(instance, filename):
return f'toulouse_foederis/{instance.resource.slug}/{filename}'
class Document(models.Model):
resource = models.ForeignKey(
verbose_name=_('Resource'),
to=Resource,
on_delete=models.CASCADE,
related_name='documents',
)
external_id = models.CharField(_('Key'), max_length=64, unique=True)
text = models.CharField(_('Text'), max_length=64, null=True)
data = JSONField(_('Data'), null=True)
pdf = models.FileField(_('PDF file'), upload_to=upload_to)
created = models.DateTimeField(_('Created'), auto_now_add=True)
updated = models.DateTimeField(_('Updated'), auto_now=True)
def __repr__(self):
return f'<Document "{self.external_id}">'
def delete(self, *args, **kwargs):
if self.pdf:
self.pdf.delete(save=False)
return super().delete(*args, **kwargs)
class Meta:
verbose_name = _('Foederis data')
verbose_name_plural = _('Foederis datas')