passerelle/passerelle/apps/opendatasoft/models.py

220 lines
7.8 KiB
Python

# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2020 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
from urllib import parse as urlparse
from django.db import models
from django.shortcuts import get_object_or_404
from django.urls import reverse
from django.utils.translation import gettext_lazy as _
from requests import RequestException
from passerelle.base.models import BaseQuery, BaseResource
from passerelle.utils.api import endpoint
from passerelle.utils.jsonresponse import APIError
from passerelle.utils.templates import render_to_string, validate_template
class OpenDataSoft(BaseResource):
service_url = models.CharField(
_('Site URL'),
max_length=256,
blank=False,
help_text=_('URL without ending "api/records/1.0/search/"'),
)
api_key = models.CharField(
_('API key'),
max_length=128,
blank=True,
help_text=_('API key used as credentials'),
)
category = _('Data Sources')
class Meta:
verbose_name = _('OpenDataSoft Web Service')
def export_json(self):
data = super().export_json()
data['queries'] = [query.export_json() for query in self.queries.all()]
return data
@classmethod
def import_json_real(cls, overwrite, instance, d, **kwargs):
data_queries = d.pop('queries', [])
instance = super().import_json_real(overwrite, instance, d, **kwargs)
queries = []
if instance and overwrite:
Query.objects.filter(resource=instance).delete()
for data_query in data_queries:
query = Query.import_json(data_query)
query.resource = instance
queries.append(query)
Query.objects.bulk_create(queries)
return instance
def call_search(
self, dataset=None, text_template='', filter_expression='', sort=None, limit=None, id=None, q=None
):
scheme, netloc, path, params, query, fragment = urlparse.urlparse(self.service_url)
path = urlparse.urljoin(path, 'api/records/1.0/search/')
url = urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
params = {'dataset': dataset}
if id is not None:
params['q'] = 'recordid:%s' % id
elif q is not None:
# remove query language operators
terms = re.split(r'[^\w]', q)
terms = [term for term in terms if len(term) > 1 and term.lower() not in ['and', 'or', 'not']]
params['q'] = ' '.join(terms)
elif sort:
params['sort'] = sort
if self.api_key:
params['apikey'] = self.api_key
if limit:
params['rows'] = limit
params.update(urlparse.parse_qs(filter_expression))
try:
response = self.requests.get(url, params=params)
except RequestException as e:
raise APIError('OpenDataSoft error: %s' % e)
try:
json_response = response.json()
except ValueError:
json_response = None
if json_response and json_response.get('error'):
raise APIError(json_response.get('error'))
try:
response.raise_for_status()
except RequestException as e:
raise APIError('OpenDataSoft error: %s' % e)
if not json_response:
raise APIError('OpenDataSoft error: bad JSON response')
result = []
for record in json_response.get('records'):
data = {}
for key, value in record.get('fields').items():
if key in ('id', 'text'):
key = 'original_%s' % key
data[key] = value
data['id'] = record.get('recordid')
data['text'] = render_to_string(text_template, data).strip()
result.append(data)
return result
@endpoint(
description=_('Search'),
parameters={
'dataset': {'description': _('Dataset')},
'text_template': {'description': _('Text template')},
'sort': {'description': _('Sort field')},
'limit': {'description': _('Maximum items')},
'id': {'description': _('Record identifier')},
'q': {'description': _('Full text query')},
},
)
def search(
self, request, dataset=None, text_template='', sort=None, limit=None, id=None, q=None, **kwargs
):
result = self.call_search(dataset, text_template, '', sort, limit, id, q)
return {'data': result}
@endpoint(
name='q',
description=_('Query'),
pattern=r'^(?P<query_slug>[\w:_-]+)/$',
show=False,
)
def q(self, request, query_slug, **kwargs):
query = get_object_or_404(Query, resource=self, slug=query_slug)
result = query.q(request, **kwargs)
return {'data': result}
def create_query_url(self):
return reverse('opendatasoft-query-new', kwargs={'slug': self.slug})
class Query(BaseQuery):
resource = models.ForeignKey(
to=OpenDataSoft, related_name='queries', verbose_name=_('Resource'), on_delete=models.CASCADE
)
dataset = models.CharField(
_('Dataset'),
max_length=128,
blank=False,
help_text=_('dataset to query'),
)
text_template = models.TextField(
verbose_name=_('Text template'),
help_text=_("Use Django's template syntax. Attributes can be accessed through {{ attributes.name }}"),
validators=[validate_template],
blank=True,
)
filter_expression = models.TextField(
verbose_name=_('filter'),
help_text=_('Specify refine and exclude facet expressions separated lines'),
blank=True,
)
sort = models.CharField(
verbose_name=_('Sort field'),
help_text=_(
'Sorts results by the specified field. A minus sign - may be used to perform an ascending sort.'
),
max_length=256,
blank=True,
)
limit = models.PositiveIntegerField(
default=10,
verbose_name='Limit',
help_text=_('Number of results to return in a single call'),
)
delete_view = 'opendatasoft-query-delete'
edit_view = 'opendatasoft-query-edit'
def q(self, request, **kwargs):
return self.resource.call_search(
dataset=self.dataset,
text_template=self.text_template,
filter_expression='&'.join(
[x.strip() for x in str(self.filter_expression).splitlines() if x.strip()]
),
sort=self.sort,
limit=self.limit,
id=kwargs.get('id'),
q=kwargs.get('q'),
)
def as_endpoint(self):
endpoint = super().as_endpoint(path=self.resource.q.endpoint_info.name)
search_endpoint = self.resource.search.endpoint_info
endpoint.func = search_endpoint.func
endpoint.show_undocumented_params = False
# Copy generic params descriptions from original endpoint
# if they are not overloaded by the query
for param in search_endpoint.parameters:
if param in ('dataset', 'text_template') and getattr(self, param):
continue
endpoint.parameters[param] = search_endpoint.parameters[param]
return endpoint