data sources: add error management parameters (#44054)

2021-03-14 12:24:50 +01:00 · 2021-03-14 12:24:50 +01:00 · 61367b05dc
parent c27e98cc84
commit 61367b05dc
7 changed files with 231 additions and 84 deletions
--- a/tests/form_pages/test_all.py
+++ b/tests/form_pages/test_all.py
@ -24,6 +24,7 @@ from django.utils.encoding import force_bytes, force_text
 from wcs.qommon import force_str
 from wcs.qommon.emails import docutils
 from wcs.qommon.ident.password_accounts import PasswordAccount
+from wcs.qommon.misc import ConnectionError
 from wcs.carddef import CardDef
 from wcs.formdef import FormDef
 from wcs.workflows import (
@ -96,6 +97,13 @@ def pub(request, emails):
    return pub


+@pytest.fixture
+def error_email(pub):
+    pub.cfg['debug'] = {'error_email': 'errors@localhost.invalid'}
+    pub.write_cfg()
+    pub.set_config()
+
+
 def teardown_module(module):
    clean_temporary_pub()

@ -5662,7 +5670,7 @@ def test_items_field_with_disabled_items(http_requests, pub):
        assert formdef.data_class().select()[0].data['0_display'] == 'world'


-def test_item_field_autocomplete_json_source(http_requests, pub):
+def test_item_field_autocomplete_json_source(http_requests, pub, error_email, emails):
    user = create_user(pub)
    formdef = create_formdef()
    formdef.data_class().wipe()
@ -5752,6 +5760,33 @@ def test_item_field_autocomplete_json_source(http_requests, pub):
        # check unauthorized access
        resp2 = get_app(pub).get(select2_url + '?q=hell', status=403)

+    # check error handling in autocomplete endpoint
+    formdef.data_class().wipe()
+
+    app = get_app(pub)
+    with mock.patch('wcs.qommon.misc.urlopen') as urlopen:
+        urlopen.side_effect = ConnectionError('...')
+        resp = app.get('/test/')
+        assert urlopen.call_count == 0
+        pq = resp.pyquery.remove_namespaces()
+        select2_url = pq('select').attr['data-select2-url']
+
+        assert emails.count() == 0
+        resp2 = app.get(select2_url + '?q=hell')
+        assert urlopen.call_count == 1
+        assert urlopen.call_args[0][0] == 'http://remote.example.net/json?q=hell'
+        assert resp2.json == {'data': [], 'err': '1'}
+        assert emails.count() == 0
+
+        data_source.notify_on_errors = True
+        data_source.store()
+        resp2 = app.get(select2_url + '?q=hell')
+        assert emails.count() == 1
+        assert 'wcs.qommon.errors.ConnectionError: ...' in emails.get_latest('subject')
+
+        data_source.notify_on_errors = False
+        data_source.store()
+
    # simulate select2 mode, with qommon.forms.js adding an extra hidden widget
    resp.form.fields['f0_display'] = Hidden(form=resp.form, tag='input', name='f0_display', pos=10)
    resp.form['f0'].force_value('1')
--- a/tests/test_datasource.py
+++ b/tests/test_datasource.py
@ -53,6 +53,13 @@ def requests_pub(pub, request):
    return req


+@pytest.fixture
+def error_email(pub):
+    pub.cfg['debug'] = {'error_email': 'errors@localhost.invalid'}
+    pub.write_cfg()
+    pub.set_config()
+
+
 def test_item_field_python_datasource(requests_pub):
    req = get_request()
    req.environ['REQUEST_METHOD'] = 'POST'
@ -90,14 +97,6 @@ def test_python_datasource(pub):
        {'id': '2', 'text': 'bar'},
    ]

-    # invalid python expression
-    datasource = {'type': 'formula', 'value': 'foobar'}
-    assert data_sources.get_items(datasource) == []
-
-    # expression not iterable
-    datasource = {'type': 'formula', 'value': '2'}
-    assert data_sources.get_items(datasource) == []
-
    # three-item tuples
    plain_list = [('1', 'foo', 'a'), ('2', 'bar', 'b')]
    datasource = {'type': 'formula', 'value': repr(plain_list)}
@ -132,6 +131,18 @@ def test_python_datasource(pub):
    ]


+def test_python_datasource_errors(pub, error_email, http_requests, emails, caplog):
+    # invalid python expression
+    datasource = {'type': 'formula', 'value': 'foobar', 'notify_on_errors': True}
+    assert data_sources.get_items(datasource) == []
+    assert 'Failed to eval() Python data source' in emails.get_latest('subject')
+
+    # expression not iterable
+    datasource = {'type': 'formula', 'value': '2', 'notify_on_errors': True}
+    assert data_sources.get_items(datasource) == []
+    assert 'gave a non-iterable result' in emails.get_latest('subject')
+
+
 def test_python_datasource_with_evalutils(pub):
    plain_list = [
        {'id': 'foo', 'text': 'Foo', 'value': '2017-01-01'},
@ -365,41 +376,54 @@ def test_json_datasource(pub, requests_pub, http_requests):
    ]


-def test_json_datasource_bad_url(pub, http_requests, caplog):
+def test_json_datasource_bad_url(pub, error_email, http_requests, emails, caplog):
    datasource = {'type': 'json', 'value': 'http://remote.example.net/404'}
    assert data_sources.get_items(datasource) == []
-    assert 'Error loading JSON data source' in caplog.records[-1].message
-    assert 'status: 404' in caplog.records[-1].message
+    assert emails.count() == 0

-    datasource = {'type': 'json', 'value': 'http://remote.example.net/xml'}
+    datasource = {'type': 'json', 'value': 'http://remote.example.net/404', 'notify_on_errors': True}
    assert data_sources.get_items(datasource) == []
-    assert 'Error reading JSON data source output' in caplog.records[-1].message
-    assert 'Expecting value:' in caplog.records[-1].message
+    assert emails.count() == 1
+    assert 'error in HTTP request to http://remote.example.net/404 (status: 404)' in emails.get_latest(
+        'subject'
+    )

-    datasource = {'type': 'json', 'value': 'http://remote.example.net/connection-error'}
+    datasource = {'type': 'json', 'value': 'http://remote.example.net/xml', 'notify_on_errors': True}
    assert data_sources.get_items(datasource) == []
-    assert 'Error loading JSON data source' in caplog.records[-1].message
-    assert 'error' in caplog.records[-1].message
+    assert emails.count() == 2
+    assert 'Error reading JSON data source' in emails.get_latest('subject')

-    datasource = {'type': 'json', 'value': 'http://remote.example.net/json-list-err1'}
+    datasource = {
+        'type': 'json',
+        'value': 'http://remote.example.net/connection-error',
+        'notify_on_errors': True,
+    }
    assert data_sources.get_items(datasource) == []
-    assert 'Error reading JSON data source output (err 1)' in caplog.records[-1].message
+    assert 'Error loading JSON data source' in emails.get_latest('subject')

-
-def test_json_datasource_bad_url_scheme(pub, caplog):
-    datasource = {'type': 'json', 'value': ''}
+    datasource = {
+        'type': 'json',
+        'value': 'http://remote.example.net/json-list-err1',
+        'notify_on_errors': True,
+    }
    assert data_sources.get_items(datasource) == []
-    assert caplog.records[-1].message == 'Empty URL in JSON data source'
+    assert 'Error reading JSON data source output (err 1)' in emails.get_latest('subject')

-    datasource = {'type': 'json', 'value': 'foo://bar'}
-    assert data_sources.get_items(datasource) == []
-    assert 'Error loading JSON data source' in caplog.records[-1].message
-    assert 'invalid scheme in URL' in caplog.records[-1].message

-    datasource = {'type': 'json', 'value': '/bla/blo'}
+def test_json_datasource_bad_url_scheme(pub, error_email, emails):
+    datasource = {'type': 'json', 'value': '', 'notify_on_errors': True}
    assert data_sources.get_items(datasource) == []
-    assert 'Error loading JSON data source' in caplog.records[-1].message
-    assert 'invalid scheme in URL' in caplog.records[-1].message
+    assert emails.count() == 0
+
+    datasource = {'type': 'json', 'value': 'foo://bar', 'notify_on_errors': True}
+    assert data_sources.get_items(datasource) == []
+    assert 'Error loading JSON data source' in emails.get_latest('subject')
+    assert 'invalid scheme in URL' in emails.get_latest('subject')
+
+    datasource = {'type': 'json', 'value': '/bla/blo', 'notify_on_errors': True}
+    assert data_sources.get_items(datasource) == []
+    assert 'Error loading JSON data source' in emails.get_latest('subject')
+    assert 'invalid scheme in URL' in emails.get_latest('subject')


 def test_geojson_datasource(pub, requests_pub, http_requests):
@ -741,41 +765,49 @@ def test_geojson_datasource(pub, requests_pub, http_requests):
    ]


-def test_geojson_datasource_bad_url(pub, http_requests, caplog):
-    datasource = {'type': 'geojson', 'value': 'http://remote.example.net/404'}
+def test_geojson_datasource_bad_url(pub, http_requests, error_email, emails):
+    datasource = {'type': 'geojson', 'value': 'http://remote.example.net/404', 'notify_on_errors': True}
    assert data_sources.get_items(datasource) == []
-    assert 'Error loading JSON data source' in caplog.records[-1].message
-    assert 'status: 404' in caplog.records[-1].message
+    assert 'Error loading JSON data source' in emails.get_latest('subject')
+    assert 'status: 404' in emails.get_latest('subject')

-    datasource = {'type': 'geojson', 'value': 'http://remote.example.net/xml'}
+    datasource = {'type': 'geojson', 'value': 'http://remote.example.net/xml', 'notify_on_errors': True}
    assert data_sources.get_items(datasource) == []
-    assert 'Error reading JSON data source output' in caplog.records[-1].message
-    assert 'Expecting value:' in caplog.records[-1].message
+    assert 'Error reading JSON data source output' in emails.get_latest('subject')
+    assert 'Expecting value:' in emails.get_latest('subject')

-    datasource = {'type': 'geojson', 'value': 'http://remote.example.net/connection-error'}
+    datasource = {
+        'type': 'geojson',
+        'value': 'http://remote.example.net/connection-error',
+        'notify_on_errors': True,
+    }
    assert data_sources.get_items(datasource) == []
-    assert 'Error loading JSON data source' in caplog.records[-1].message
-    assert 'error' in caplog.records[-1].message
+    assert 'Error loading JSON data source' in emails.get_latest('subject')
+    assert 'error' in emails.get_latest('subject')

-    datasource = {'type': 'geojson', 'value': 'http://remote.example.net/json-list-err1'}
+    datasource = {
+        'type': 'geojson',
+        'value': 'http://remote.example.net/json-list-err1',
+        'notify_on_errors': True,
+    }
    assert data_sources.get_items(datasource) == []
-    assert 'Error reading JSON data source output (err 1)' in caplog.records[-1].message
+    assert 'Error reading JSON data source output (err 1)' in emails.get_latest('subject')


-def test_geojson_datasource_bad_url_scheme(pub, caplog):
+def test_geojson_datasource_bad_url_scheme(pub, error_email, emails):
    datasource = {'type': 'geojson', 'value': ''}
    assert data_sources.get_items(datasource) == []
-    assert caplog.records[-1].message == 'Empty URL in GeoJSON data source'
+    assert emails.count() == 0

-    datasource = {'type': 'geojson', 'value': 'foo://bar'}
+    datasource = {'type': 'geojson', 'value': 'foo://bar', 'notify_on_errors': True}
    assert data_sources.get_items(datasource) == []
-    assert 'Error loading JSON data source' in caplog.records[-1].message
-    assert 'invalid scheme in URL' in caplog.records[-1].message
+    assert 'Error loading JSON data source' in emails.get_latest('subject')
+    assert 'invalid scheme in URL' in emails.get_latest('subject')

-    datasource = {'type': 'geojson', 'value': '/bla/blo'}
+    datasource = {'type': 'geojson', 'value': '/bla/blo', 'notify_on_errors': True}
    assert data_sources.get_items(datasource) == []
-    assert 'Error loading JSON data source' in caplog.records[-1].message
-    assert 'invalid scheme in URL' in caplog.records[-1].message
+    assert 'Error loading JSON data source' in emails.get_latest('subject')
+    assert 'invalid scheme in URL' in emails.get_latest('subject')


 def test_item_field_named_python_datasource(requests_pub):
--- a/tests/utilities.py
+++ b/tests/utilities.py
@ -245,8 +245,8 @@ def login(app, username='admin', password='admin'):
 class EmailsMocking(object):
    def create_smtp_server(self, *args, **kwargs):
        class MockSmtplibSMTP(object):
-            def __init__(self, emails):
-                self.emails = emails
+            def __init__(self, mocking):
+                self.mocking = mocking

            def send_message(self, msg, msg_from, rcpts):
                return self.sendmail(msg_from, rcpts, msg.as_string())
@ -260,23 +260,31 @@ class EmailsMocking(object):
                else:
                    payload = msg.get_payload(decode=True)
                    payloads = [payload]
-                self.emails[force_text(subject)] = {
+                self.mocking.emails[force_text(subject)] = {
                    'from': msg_from,
                    'to': email.header.decode_header(msg['To'])[0][0],
                    'payload': force_str(payload if payload else ''),
                    'payloads': payloads,
                    'msg': msg,
+                    'subject': force_text(subject),
                }
-                self.emails[force_text(subject)]['email_rcpt'] = rcpts
+                self.mocking.emails[force_text(subject)]['email_rcpt'] = rcpts
+                self.mocking.latest_subject = force_text(subject)

            def quit(self):
                pass

-        return MockSmtplibSMTP(self.emails)
+        return MockSmtplibSMTP(self)

    def get(self, subject):
        return self.emails.get(subject)

+    def get_latest(self, part=None):
+        email = self.emails.get(self.latest_subject, {})
+        if part:
+            return email.get(part) if email else None
+        return email
+
    def empty(self):
        self.emails.clear()

@ -287,6 +295,7 @@ class EmailsMocking(object):
        self.wcs_create_smtp_server = sys.modules['wcs.qommon.emails'].create_smtp_server
        sys.modules['wcs.qommon.emails'].create_smtp_server = self.create_smtp_server
        self.emails = {}
+        self.latest_subject = None
        return self

    def __exit__(self, exc_type, exc_value, tb):
@ -407,7 +416,7 @@ class HttpRequestsMocking(object):
            raise ConnectionError('error')

        if raise_on_http_errors and not (200 <= status < 300):
-            raise ConnectionError('error in HTTP request to (status: %s)' % status)
+            raise ConnectionError('error in HTTP request to %s (status: %s)' % (url, status))

        return FakeResponse(status, data, headers), status, data, None

--- a/wcs/admin/data_sources.py
+++ b/wcs/admin/data_sources.py
@ -187,6 +187,19 @@ class NamedDataSourceUI(object):
                'data-dynamic-display-value': 'json',
            },
        )
+        form.add(
+            CheckboxWidget,
+            'notify_on_errors',
+            title=_('Notify on errors'),
+            value=self.datasource.notify_on_errors,
+        )
+        form.add(
+            CheckboxWidget,
+            'record_on_errors',
+            title=_('Record on errors'),
+            value=self.datasource.record_on_errors,
+        )
+
        if not self.datasource.is_readonly():
            form.add_submit('submit', _('Submit'))
        form.add_submit('cancel', _('Cancel'))
@ -209,18 +222,14 @@ class NamedDataSourceUI(object):
            raise ValueError()

        self.datasource.name = name
-        self.datasource.description = form.get_widget('description').parse()
-        self.datasource.data_source = form.get_widget('data_source')
-        self.datasource.cache_duration = form.get_widget('cache_duration').parse()
-        self.datasource.query_parameter = form.get_widget('query_parameter').parse()
-        self.datasource.id_parameter = form.get_widget('id_parameter').parse()
-        self.datasource.data_attribute = form.get_widget('data_attribute').parse()
-        self.datasource.id_attribute = form.get_widget('id_attribute').parse()
-        self.datasource.text_attribute = form.get_widget('text_attribute').parse()
-        self.datasource.id_property = form.get_widget('id_property').parse()
-        self.datasource.label_template_property = form.get_widget('label_template_property').parse()
        if slug_widget:
            self.datasource.slug = slug
+
+        for widget in form.widgets:
+            if widget.name in ('name', 'slug'):
+                continue
+            setattr(self.datasource, widget.name, widget.parse())
+
        self.datasource.store()


--- a/wcs/api.py
+++ b/wcs/api.py
@ -17,6 +17,7 @@
 import datetime
 import json
 import re
+import sys
 import time
 import urllib.parse

@ -29,7 +30,12 @@ from django.http import HttpResponse, HttpResponseBadRequest, JsonResponse

 from .qommon import _
 from .qommon import misc
-from .qommon.errors import AccessForbiddenError, TraversalError, UnknownNameIdAccessForbiddenError
+from .qommon.errors import (
+    AccessForbiddenError,
+    TraversalError,
+    UnknownNameIdAccessForbiddenError,
+    ConnectionError,
+)
 from .qommon.form import ComputedExpressionWidget
 from .qommon.storage import Equal, NotEqual

@ -37,6 +43,7 @@ from wcs.categories import Category
 from wcs.conditions import Condition, ValidationError
 from wcs.carddef import CardDef
 from wcs.formdef import FormDef
+from wcs.data_sources import NamedDataSource
 from wcs.data_sources import get_object as get_data_source_object
 from wcs.roles import Role, logged_users_role
 from wcs.forms.common import FormStatusPage
@ -1022,7 +1029,19 @@ class AutocompleteDirectory(Directory):
            url += urllib.parse.quote(get_request().form['q'])
            url = sign_url_auto_orig(url)
            get_response().set_content_type('application/json')
-            return misc.urlopen(url).read()
+            try:
+                return misc.urlopen(url).read()
+            except ConnectionError:
+                if 'data_source' in info:
+                    data_source = NamedDataSource.get(info['data_source'])
+                    exc_info = sys.exc_info()
+                    get_publisher().notify_of_exception(
+                        exc_info,
+                        context='[DATASOURCE]',
+                        notify=data_source.notify_on_errors,
+                        record=data_source.record_on_errors,
+                    )
+                return json.dumps({'data': [], 'err': '1'})

        # carddef_ref in info
        carddef_ref = info['carddef_ref']
--- a/wcs/data_sources.py
+++ b/wcs/data_sources.py
@ -16,6 +16,7 @@

 import collections
 import hashlib
+import sys
 import urllib.parse
 import xml.etree.ElementTree as ET

@ -165,6 +166,8 @@ def get_json_from_url(url, data_source=None, log_message_part='JSON data source'
    data_source = data_source or {}
    data_key = data_source.get('data_attribute') or 'data'
    geojson = data_source.get('type') == 'geojson'
+    error_summary = None
+    exc = None
    try:
        entries = misc.json_loads(misc.urlopen(url).read())
        if not isinstance(entries, dict):
@ -177,13 +180,27 @@ def get_json_from_url(url, data_source=None, log_message_part='JSON data source'
        else:
            if not isinstance(entries.get(data_key), list):
                raise ValueError('not a json dict with a %s list attribute' % data_key)
+        return entries
    except misc.ConnectionError as e:
-        get_logger().warning('Error loading %s (%s)' % (log_message_part, str(e)))
-        return None
+        error_summary = 'Error loading %s (%s)' % (log_message_part, str(e))
+        exc = e
    except (ValueError, TypeError) as e:
-        get_logger().warning('Error reading %s output (%s)' % (log_message_part, str(e)))
-        return None
-    return entries
+        error_summary = 'Error reading %s output (%s)' % (log_message_part, str(e))
+        exc = e
+
+    if data_source and (data_source.get('record_on_errors') or data_source.get('notify_on_errors')):
+        try:
+            raise Exception(error_summary) from exc
+        except Exception:
+            exc_info = sys.exc_info()
+        get_publisher().notify_of_exception(
+            exc_info,
+            context='[DATASOURCE]',
+            notify=data_source.get('notify_on_errors'),
+            record=data_source.get('record_on_errors'),
+        )
+
+    return None


 def request_json_items(url, data_source):
@ -271,8 +288,17 @@ def get_structured_items(data_source, mode=None):
        try:
            value = eval(data_source.get('value'), global_eval_dict, variables)
            if not isinstance(value, collections.Iterable):
-                get_logger().warning(
-                    'Python data source (%r) gave a non-iterable result' % data_source.get('value')
+                try:
+                    raise Exception(
+                        'Python data source (%r) gave a non-iterable result' % data_source.get('value')
+                    )
+                except Exception:
+                    exc_info = sys.exc_info()
+                get_publisher().notify_of_exception(
+                    exc_info,
+                    context='[DATASOURCE]',
+                    notify=data_source.get('notify_on_errors'),
+                    record=data_source.get('record_on_errors'),
                )
                return []
            if len(value) == 0:
@ -289,8 +315,19 @@ def get_structured_items(data_source, mode=None):
            elif isinstance(value[0], str):
                return [{'id': x, 'text': x} for x in value]
            return value
-        except:
-            get_logger().warning('Failed to eval() Python data source (%r)' % data_source.get('value'))
+        except Exception as exc:
+            try:
+                raise Exception(
+                    'Failed to eval() Python data source (%r)' % data_source.get('value')
+                ) from exc
+            except Exception:
+                exc_info = sys.exc_info()
+            get_publisher().notify_of_exception(
+                exc_info,
+                context='[DATASOURCE]',
+                notify=data_source.get('notify_on_errors'),
+                record=data_source.get('record_on_errors'),
+            )
            return []
    elif data_source.get('type') in ['json', 'geojson']:
        # the content available at a json URL, it must answer with a dict with
@ -299,10 +336,6 @@ def get_structured_items(data_source, mode=None):
        geojson = data_source.get('type') == 'geojson'
        url = data_source.get('value')
        if not url:
-            if geojson:
-                get_logger().warning('Empty URL in GeoJSON data source')
-            else:
-                get_logger().warning('Empty URL in JSON data source')
            return []
        url = url.strip()
        if Template.is_template_string(url):
@ -382,6 +415,8 @@ class NamedDataSource(XmlStorableObject):
    label_template_property = None
    external = None
    external_status = None
+    notify_on_errors = False
+    record_on_errors = False

    # declarations for serialization
    XML_NODES = [
@ -399,6 +434,8 @@ class NamedDataSource(XmlStorableObject):
        ('external', 'str'),
        ('external_status', 'str'),
        ('data_source', 'data_source'),
+        ('notify_on_errors', 'bool'),
+        ('record_on_errors', 'bool'),
    ]

    def __init__(self, name=None):
@ -417,6 +454,8 @@ class NamedDataSource(XmlStorableObject):
                {
                    'id_property': self.id_property,
                    'label_template_property': self.label_template_property,
+                    'notify_on_errors': self.notify_on_errors,
+                    'record_on_errors': self.record_on_errors,
                }
            )
            return data_source
@ -427,6 +466,8 @@ class NamedDataSource(XmlStorableObject):
                    'data_attribute': self.data_attribute,
                    'id_attribute': self.id_attribute,
                    'text_attribute': self.text_attribute,
+                    'notify_on_errors': self.notify_on_errors,
+                    'record_on_errors': self.record_on_errors,
                }
            )
            return data_source
@ -527,7 +568,7 @@ class NamedDataSource(XmlStorableObject):
            json_url = self.get_json_query_url()
            info = None
            if json_url:
-                info = {'url': json_url}
+                info = {'url': json_url, 'data_source': self.id}
            return '/api/autocomplete/%s' % (get_session().get_data_source_query_info_token(info))
        if self.type and self.type.startswith('carddef:'):
            parts = self.type.split(':')
--- a/wcs/templates/wcs/backoffice/data-source.html
+++ b/wcs/templates/wcs/backoffice/data-source.html
@ -19,7 +19,7 @@
 <h3>{% trans "Configuration" %}</h3>
 <ul>
  <li>{% trans "Type of source:" %} {{ datasource.type_label }}</li>
-  {% if datasource.data_source.type == 'json' or datasource.data_source.type == 'jsonp' %}
+  {% if datasource.data_source.type == 'json' or datasource.data_source.type == 'jsonp' or datasource.data_source.type == 'geojson' %}
  <li>{% trans "URL:" %} <a href="{{ datasource.get_variadic_url }}">{{ datasource.data_source.value }}</a></li>
  {% elif datasource.data_source.type == 'formula' %}
  <li>{% trans "Python Expression:" %} {{ datasource.data_source.value }}</li>
@ -27,6 +27,8 @@
  {% if datasource.cache_duration %}
  <li>{% trans "Cache Duration:" %} {{ datasource.humanized_cache_duration }}
  {% endif %}
+  <li>{% trans "Notify on errors:" %} {{ datasource.notify_on_errors|yesno }}</li>
+  <li>{% trans "Record on errors:" %} {{ datasource.record_on_errors|yesno }}</li>
 </ul>
 </div>