220 lines
7.8 KiB
Python
220 lines
7.8 KiB
Python
# passerelle - uniform access to multiple data sources and services
|
|
# Copyright (C) 2020 Entr'ouvert
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify it
|
|
# under the terms of the GNU Affero General Public License as published
|
|
# by the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import re
|
|
from urllib import parse as urlparse
|
|
|
|
from django.db import models
|
|
from django.shortcuts import get_object_or_404
|
|
from django.urls import reverse
|
|
from django.utils.translation import gettext_lazy as _
|
|
from requests import RequestException
|
|
|
|
from passerelle.base.models import BaseQuery, BaseResource
|
|
from passerelle.utils.api import endpoint
|
|
from passerelle.utils.jsonresponse import APIError
|
|
from passerelle.utils.templates import render_to_string, validate_template
|
|
|
|
|
|
class OpenDataSoft(BaseResource):
|
|
service_url = models.CharField(
|
|
_('Site URL'),
|
|
max_length=256,
|
|
blank=False,
|
|
help_text=_('URL without ending "api/records/1.0/search/"'),
|
|
)
|
|
api_key = models.CharField(
|
|
_('API key'),
|
|
max_length=128,
|
|
blank=True,
|
|
help_text=_('API key used as credentials'),
|
|
)
|
|
|
|
category = _('Data Sources')
|
|
|
|
class Meta:
|
|
verbose_name = _('OpenDataSoft Web Service')
|
|
|
|
def export_json(self):
|
|
data = super().export_json()
|
|
data['queries'] = [query.export_json() for query in self.queries.all()]
|
|
return data
|
|
|
|
@classmethod
|
|
def import_json_real(cls, overwrite, instance, d, **kwargs):
|
|
data_queries = d.pop('queries', [])
|
|
instance = super().import_json_real(overwrite, instance, d, **kwargs)
|
|
queries = []
|
|
if instance and overwrite:
|
|
Query.objects.filter(resource=instance).delete()
|
|
for data_query in data_queries:
|
|
query = Query.import_json(data_query)
|
|
query.resource = instance
|
|
queries.append(query)
|
|
Query.objects.bulk_create(queries)
|
|
return instance
|
|
|
|
def call_search(
|
|
self, dataset=None, text_template='', filter_expression='', sort=None, limit=None, id=None, q=None
|
|
):
|
|
scheme, netloc, path, params, query, fragment = urlparse.urlparse(self.service_url)
|
|
path = urlparse.urljoin(path, 'api/records/1.0/search/')
|
|
url = urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
|
|
|
|
params = {'dataset': dataset}
|
|
if id is not None:
|
|
params['q'] = 'recordid:%s' % id
|
|
elif q is not None:
|
|
# remove query language operators
|
|
terms = re.split(r'[^\w]', q)
|
|
terms = [term for term in terms if len(term) > 1 and term.lower() not in ['and', 'or', 'not']]
|
|
params['q'] = ' '.join(terms)
|
|
elif sort:
|
|
params['sort'] = sort
|
|
if self.api_key:
|
|
params['apikey'] = self.api_key
|
|
if limit:
|
|
params['rows'] = limit
|
|
params.update(urlparse.parse_qs(filter_expression))
|
|
|
|
try:
|
|
response = self.requests.get(url, params=params)
|
|
except RequestException as e:
|
|
raise APIError('OpenDataSoft error: %s' % e)
|
|
try:
|
|
json_response = response.json()
|
|
except ValueError:
|
|
json_response = None
|
|
if json_response and json_response.get('error'):
|
|
raise APIError(json_response.get('error'))
|
|
try:
|
|
response.raise_for_status()
|
|
except RequestException as e:
|
|
raise APIError('OpenDataSoft error: %s' % e)
|
|
if not json_response:
|
|
raise APIError('OpenDataSoft error: bad JSON response')
|
|
|
|
result = []
|
|
for record in json_response.get('records'):
|
|
data = {}
|
|
for key, value in record.get('fields').items():
|
|
if key in ('id', 'text'):
|
|
key = 'original_%s' % key
|
|
data[key] = value
|
|
data['id'] = record.get('recordid')
|
|
data['text'] = render_to_string(text_template, data).strip()
|
|
result.append(data)
|
|
|
|
return result
|
|
|
|
@endpoint(
|
|
description=_('Search'),
|
|
parameters={
|
|
'dataset': {'description': _('Dataset')},
|
|
'text_template': {'description': _('Text template')},
|
|
'sort': {'description': _('Sort field')},
|
|
'limit': {'description': _('Maximum items')},
|
|
'id': {'description': _('Record identifier')},
|
|
'q': {'description': _('Full text query')},
|
|
},
|
|
)
|
|
def search(
|
|
self, request, dataset=None, text_template='', sort=None, limit=None, id=None, q=None, **kwargs
|
|
):
|
|
result = self.call_search(dataset, text_template, '', sort, limit, id, q)
|
|
return {'data': result}
|
|
|
|
@endpoint(
|
|
name='q',
|
|
description=_('Query'),
|
|
pattern=r'^(?P<query_slug>[\w:_-]+)/$',
|
|
show=False,
|
|
)
|
|
def q(self, request, query_slug, **kwargs):
|
|
query = get_object_or_404(Query, resource=self, slug=query_slug)
|
|
result = query.q(request, **kwargs)
|
|
return {'data': result}
|
|
|
|
def create_query_url(self):
|
|
return reverse('opendatasoft-query-new', kwargs={'slug': self.slug})
|
|
|
|
|
|
class Query(BaseQuery):
|
|
resource = models.ForeignKey(
|
|
to=OpenDataSoft, related_name='queries', verbose_name=_('Resource'), on_delete=models.CASCADE
|
|
)
|
|
dataset = models.CharField(
|
|
_('Dataset'),
|
|
max_length=128,
|
|
blank=False,
|
|
help_text=_('dataset to query'),
|
|
)
|
|
text_template = models.TextField(
|
|
verbose_name=_('Text template'),
|
|
help_text=_("Use Django's template syntax. Attributes can be accessed through {{ attributes.name }}"),
|
|
validators=[validate_template],
|
|
blank=True,
|
|
)
|
|
filter_expression = models.TextField(
|
|
verbose_name=_('filter'),
|
|
help_text=_('Specify refine and exclude facet expressions separated lines'),
|
|
blank=True,
|
|
)
|
|
sort = models.CharField(
|
|
verbose_name=_('Sort field'),
|
|
help_text=_(
|
|
'Sorts results by the specified field. A minus sign - may be used to perform an ascending sort.'
|
|
),
|
|
max_length=256,
|
|
blank=True,
|
|
)
|
|
limit = models.PositiveIntegerField(
|
|
default=10,
|
|
verbose_name='Limit',
|
|
help_text=_('Number of results to return in a single call'),
|
|
)
|
|
|
|
delete_view = 'opendatasoft-query-delete'
|
|
edit_view = 'opendatasoft-query-edit'
|
|
|
|
def q(self, request, **kwargs):
|
|
return self.resource.call_search(
|
|
dataset=self.dataset,
|
|
text_template=self.text_template,
|
|
filter_expression='&'.join(
|
|
[x.strip() for x in str(self.filter_expression).splitlines() if x.strip()]
|
|
),
|
|
sort=self.sort,
|
|
limit=self.limit,
|
|
id=kwargs.get('id'),
|
|
q=kwargs.get('q'),
|
|
)
|
|
|
|
def as_endpoint(self):
|
|
endpoint = super().as_endpoint(path=self.resource.q.endpoint_info.name)
|
|
|
|
search_endpoint = self.resource.search.endpoint_info
|
|
endpoint.func = search_endpoint.func
|
|
endpoint.show_undocumented_params = False
|
|
|
|
# Copy generic params descriptions from original endpoint
|
|
# if they are not overloaded by the query
|
|
for param in search_endpoint.parameters:
|
|
if param in ('dataset', 'text_template') and getattr(self, param):
|
|
continue
|
|
endpoint.parameters[param] = search_endpoint.parameters[param]
|
|
return endpoint
|