From af81da4954917af8c41fdf36301ab498f67e8200 Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Tue, 19 Dec 2023 17:36:11 +0100 Subject: [PATCH] adapters: do not log errors on cold cache (#84933) Only log errors if the cache is older than 24 hours. --- mellon/adapters.py | 170 +++++++++++++++++++--------------- tests/test_default_adapter.py | 117 ++++++++++++----------- tox.ini | 2 +- 3 files changed, 157 insertions(+), 132 deletions(-) diff --git a/mellon/adapters.py b/mellon/adapters.py index d519d7f..dff3855 100644 --- a/mellon/adapters.py +++ b/mellon/adapters.py @@ -78,83 +78,100 @@ class DefaultAdapter: yield idp def load_metadata_path(self, idp, i): + now = time.time() path = idp['METADATA_PATH'] if not os.path.exists(path): - logger.warning('mellon: metadata path %s does not exist', path) + logger.error('mellon: metadata path %s does not exist', path) return - last_update = idp.get('METADATA_PATH_LAST_UPDATE', 0) - try: - mtime = os.stat(path).st_mtime - except OSError as e: - logger.warning('mellon: metadata path %s : stat() call failed, %s', path, e) - return - if last_update == 0 or mtime >= last_update: - idp['METADATA_PATH_LAST_UPDATE'] = time.time() + + metadata_last_update = idp.get('METADATA_LAST_UPDATE', None) + + if not idp.get('METADATA'): + should_load = True + elif not metadata_last_update: + should_load = True + else: try: - with open(path) as fd: - metadata = fd.read() + mtime = os.stat(path).st_mtime except OSError as e: - logger.warning('mellon: metadata path %s : open()/read() call failed, %s', path, e) + logger.error('mellon: metadata path %s : stat() call failed, %s', path, e) return - entity_id = self.load_entity_id(metadata, i) - if not entity_id: - logger.error('mellon: invalid metadata file retrieved from %s', path) - return - if 'ENTITY_ID' in idp and idp['ENTITY_ID'] != entity_id: - logger.error( - 'mellon: metadata path %s : entityID changed %r != %r', path, entity_id, idp['ENTITY_ID'] - ) - del idp['ENTITY_ID'] - idp['METADATA'] = metadata + should_load = metadata_last_update <= mtime + + if not should_load: + return + + try: + with open(path) as fd: + metadata = fd.read() + except OSError as e: + logger.error('mellon: metadata path %s : open()/read() call failed, %s', path, e) + return + entity_id = self.load_entity_id(metadata, i) + if not entity_id: + logger.error('mellon: invalid metadata file retrieved from %s', path) + return + + if 'ENTITY_ID' in idp and idp['ENTITY_ID'] != entity_id: + raise RuntimeError( + f'mellon: metadata path {path} : entityID changed {entity_id!r} != {idp["ENTITY_ID"]!r}' + ) + + idp['METADATA'] = metadata + idp['METADATA_LAST_UPDATE'] = now def load_metadata_url(self, idp, i): + now = time.time() url = idp['METADATA_URL'] metadata_cache_time = utils.get_setting(idp, 'METADATA_CACHE_TIME') timeout = utils.get_setting(idp, 'METADATA_HTTP_TIMEOUT') - warning = logger.warning - if 'METADATA' not in idp: - # if we have no metadata in cache, we must emit errors - warning = logger.error + metadata_last_update = idp.get('METADATA_LAST_UPDATE', None) + metadata_last_url_update = idp.get('METADATA_LAST_URL_CHECK', 0) try: hostname = urlparse(url).hostname except (ValueError, TypeError) as e: - warning('invalid METADATA_URL %r: %s', url, e) + logger.error('invalid METADATA_URL %r: %s', url, e) return if not hostname: - warning('no hostname in METADATA_URL %r: %s', url) + logger.error('no hostname in METADATA_URL %r: %s', url) return - last_update = idp.get('METADATA_URL_LAST_UPDATE', 0) - now = time.time() - try: url_fingerprint = hashlib.md5(url.encode('ascii')).hexdigest() file_cache_key = '%s_%s.xml' % (hostname, url_fingerprint) except (UnicodeError, TypeError, ValueError): - warning('unable to compute file_cache_key') + logger.error('unable to compute file_cache_key') return cache_directory = default_storage.path('mellon_metadata_cache') file_cache_path = os.path.join(cache_directory, file_cache_key) - if metadata_cache_time: - # METADATA_CACHE_TIME == 0 disable the file cache - if not os.path.exists(cache_directory): - os.makedirs(cache_directory) + if not os.path.exists(cache_directory): + os.makedirs(cache_directory) - if os.path.exists(file_cache_path) and 'METADATA' not in idp: - try: - with open(file_cache_path) as fd: - idp['METADATA'] = fd.read() - # use file cache mtime as last_update time, prevent too many loading from different workers - last_update = max(last_update, os.stat(file_cache_path).st_mtime) - except OSError: - warning('metadata url %s : error when loading the file cache %s', url, file_cache_path) + if os.path.exists(file_cache_path) and 'METADATA' not in idp: + try: + with open(file_cache_path) as fd: + metadata = fd.read() + # use file cache mtime as last_update time, prevent too many loading from different workers + except OSError: + logger.error('metadata url %s : error when loading the file cache %s', url, file_cache_path) + else: + idp['METADATA'] = metadata + metadata_last_update = idp['METADATA_LAST_UPDATE'] = os.stat(file_cache_path).st_mtime # fresh cache, skip loading - if last_update and 'METADATA' in idp and (now - last_update) < metadata_cache_time: + if ( + metadata_last_update + and 'METADATA' in idp + and ( + (now - metadata_last_update) < metadata_cache_time + # if HTTP GET is failing, try every 5 minutes + or (now - metadata_last_url_update) < 60 * 5 + ) + ): return def __http_get(): @@ -164,12 +181,12 @@ class DefaultAdapter: response = requests.get(url, verify=verify_ssl_certificate, timeout=timeout) response.raise_for_status() except requests.exceptions.RequestException as e: - warning('metadata url %s : HTTP request failed %s', url, e) + logger.warning('metadata url %s : HTTP request failed %s', url, e) return entity_id = self.load_entity_id(response.text, i) if not entity_id: - warning('invalid metadata file retrieved from %s', url) + logger.warning('invalid metadata file retrieved from %s', url) return if 'ENTITY_ID' in idp and idp['ENTITY_ID'] != entity_id: @@ -177,11 +194,13 @@ class DefaultAdapter: logger.error( 'mellon: metadata url %s, entityID changed %r != %r', url, entity_id, idp['ENTITY_ID'] ) - del idp['ENTITY_ID'] + return + + if idp.get('METADATA') != response.text: + idp['METADATA'] = response.text + logger.info('mellon: metadata url %s updated', url) + idp['METADATA_LAST_UPDATE'] = now - idp['METADATA'] = response.text - idp['METADATA_URL_LAST_UPDATE'] = now - if metadata_cache_time: try: with atomic_write(file_cache_path, mode='wb', overwrite=True) as fd: fd.write(response.text.encode('utf-8')) @@ -192,46 +211,38 @@ class DefaultAdapter: file_cache_path, e, ) - idp['METADATA_PATH'] = file_cache_path - # prevent reloading of the file cache immediately - idp['METADATA_PATH_LAST_UPDATE'] = time.time() + 1 - logger.info('mellon: metadata url %s updated', url) finally: # release thread object idp.pop('METADATA_URL_UPDATE_THREAD', None) - # emit an error if cache is too old - if metadata_cache_time: - stale_timeout = 24 * metadata_cache_time - if last_update and (now - idp['METADATA_URL_LAST_UPDATE']) > stale_timeout: - logger.error( - 'mellon: metadata url %s: not updated since %.1f hours', - url, - stale_timeout / 3600.0, - ) - # we have cache, update in background - if last_update and 'METADATA' in idp: + if metadata_last_update: + # we have cache, update in background + idp['METADATA_LAST_URL_CHECK'] = time.time() t = threading.Thread(target=__http_get) t.start() # store thread in idp for tests idp['METADATA_URL_UPDATE_THREAD'] = t - # suspend updates for HTTP timeout + 5 seconds - idp['METADATA_URL_LAST_UPDATE'] = last_update + timeout + 5 else: # synchronous update __http_get() def load_metadata(self, idp, i): - # legacy support + now = time.time() + if 'METADATA' in idp and idp['METADATA'].startswith('/'): + # support legacy configuration idp['METADATA_PATH'] = idp['METADATA'] del idp['METADATA'] - if 'METADATA_PATH' in idp: - self.load_metadata_path(idp, i) + metadata_cache_time = utils.get_setting(idp, 'METADATA_CACHE_TIME') + metadata_last_update = idp.get('METADATA_LAST_UPDATE', None) + age = metadata_last_update and (now - metadata_last_update) - if 'METADATA_URL' in idp: - self.load_metadata_url(idp, i) + if age is None or age > metadata_cache_time: + if 'METADATA_PATH' in idp: + self.load_metadata_path(idp, i) + elif 'METADATA_URL' in idp: + self.load_metadata_url(idp, i) if 'METADATA' in idp: if 'ENTITY_ID' not in idp: @@ -239,21 +250,28 @@ class DefaultAdapter: if entity_id: idp['ENTITY_ID'] = entity_id + if age and age > (24 * metadata_cache_time): + logger.error( + 'mellon: metadata for idp %s: not updated since %.1f hours', + idp.get('ENTITY_ID', i), + age / 3600.0, + ) + if 'ENTITY_ID' in idp: return idp['METADATA'] + logger.error('mellon: could not load metadata for %d-th idp: %s', i, idp) + return None + def load_entity_id(self, metadata, i): try: doc = ET.fromstring(metadata) except (TypeError, ET.ParseError): - logger.error('mellon: METADATA of %d-th idp is invalid', i) return None if doc.tag != '{%s}EntityDescriptor' % lasso.SAML2_METADATA_HREF: - logger.error('mellon: METADATA of %d-th idp has no EntityDescriptor root tag', i) return None if 'entityID' not in doc.attrib: - logger.error('mellon: METADATA of %d-th idp has no entityID attribute on its root tag', i) return None return doc.attrib['entityID'] diff --git a/tests/test_default_adapter.py b/tests/test_default_adapter.py index 6a1d479..cd34010 100644 --- a/tests/test_default_adapter.py +++ b/tests/test_default_adapter.py @@ -22,6 +22,7 @@ from unittest import mock import lasso import pytest +import responses from django.contrib import auth from django.db import connection @@ -381,10 +382,13 @@ def test_load_metadata_simple(adapter, metadata): assert adapter.load_metadata(idp, 0) == metadata -def test_load_metadata_legacy(adapter, metadata_path, metadata): +def test_load_metadata_legacy(adapter, metadata_path, metadata, freezer): + now = time.time() idp = {'METADATA': metadata_path} assert adapter.load_metadata(idp, 0) == metadata assert idp['METADATA'] == metadata + assert idp['METADATA_PATH'] == metadata_path + assert idp['METADATA_LAST_UPDATE'] == now def test_load_metadata_path(adapter, metadata_path, metadata, freezer): @@ -392,71 +396,74 @@ def test_load_metadata_path(adapter, metadata_path, metadata, freezer): idp = {'METADATA_PATH': str(metadata_path)} assert adapter.load_metadata(idp, 0) == metadata assert idp['METADATA'] == metadata - assert idp['METADATA_PATH_LAST_UPDATE'] == now + assert idp['METADATA_LAST_UPDATE'] == now -def test_load_metadata_url(settings, adapter, metadata, httpserver, freezer, caplog): - now = time.time() - httpserver.serve_content(content=metadata, headers={'Content-Type': 'application/xml'}) - idp = {'METADATA_URL': httpserver.url} +METADATA_URL = 'https://example.com/metadata' + + +@responses.activate +def test_load_metadata_url(settings, adapter, metadata, freezer, caplog): + idp = {'METADATA_URL': METADATA_URL} + + def wait_for_update_thread(): + # wait for update thread to finish + try: + idp['METADATA_URL_UPDATE_THREAD'].join() + except KeyError: + pass + + # test normal loading + responses.get(METADATA_URL, body=metadata, headers={'Content-Type': 'application/xml'}) assert adapter.load_metadata(idp, 0) == metadata + assert 'METADATA_URL_UPDATE_THREAD' not in idp assert idp['METADATA'] == metadata - assert idp['METADATA_URL_LAST_UPDATE'] == now - assert 'METADATA_PATH' in idp - assert idp['METADATA_PATH'].startswith(settings.MEDIA_ROOT) - with open(idp['METADATA_PATH']) as fd: - assert fd.read() == metadata - assert idp['METADATA_PATH_LAST_UPDATE'] == now + 1 - httpserver.serve_content( - content=metadata.replace('idp5', 'idp6'), headers={'Content-Type': 'application/xml'} - ) - assert adapter.load_metadata(idp, 0) == metadata + assert idp['METADATA_LAST_UPDATE'] == time.time() - freezer.move_to(datetime.timedelta(seconds=3601)) + # test ENTITY_ID change caplog.clear() + freezer.move_to(datetime.timedelta(seconds=3601)) + responses.replace( + responses.GET, + METADATA_URL, + body=metadata.replace('idp5', 'idp6'), + headers={'Content-Type': 'application/xml'}, + ) + assert adapter.load_metadata(idp, 0) == metadata - # wait for update thread to finish - try: - idp['METADATA_URL_UPDATE_THREAD'].join() - except KeyError: - pass - new_meta = adapter.load_metadata(idp, 0) - assert new_meta != metadata - assert new_meta == metadata.replace('idp5', 'idp6') + wait_for_update_thread() + assert 'WARNING' not in caplog.text assert any( 'entityID changed' in record.message and record.levelname == 'ERROR' for record in caplog.records ) + # test load from file cache - del idp['METADATA'] - del idp['METADATA_PATH'] - del idp['METADATA_PATH_LAST_UPDATE'] - httpserver.serve_content(content='', headers={'Content-Type': 'application/xml'}) - assert adapter.load_metadata(idp, 0) == metadata.replace('idp5', 'idp6') - - -def test_load_metadata_url_stale_timeout(settings, adapter, metadata, httpserver, freezer, caplog): - httpserver.serve_content(content=metadata, headers={'Content-Type': 'application/xml'}) - idp = {'METADATA_URL': httpserver.url} - assert adapter.load_metadata(idp, 0) == metadata - httpserver.serve_content(content='', headers={'Content-Type': 'application/xml'}) - assert adapter.load_metadata(idp, 0) == metadata - - freezer.move_to(datetime.timedelta(seconds=24 * 3600 - 1)) - assert adapter.load_metadata(idp, 0) == metadata - - # wait for update thread to finish - try: - idp['METADATA_URL_UPDATE_THREAD'].join() - except KeyError: - pass - assert caplog.records[-1].levelname == 'WARNING' - freezer.move_to(datetime.timedelta(seconds=3601)) + caplog.clear() + del idp['METADATA'] + request = responses.replace( + responses.GET, METADATA_URL, body='', headers={'Content-Type': 'application/xml'} + ) assert adapter.load_metadata(idp, 0) == metadata + wait_for_update_thread() + assert len(caplog.records) == 1 + assert caplog.records[0].levelname == 'WARNING' + assert 'invalid metadata' in caplog.records[0].message + assert request.call_count == 1 - # wait for update thread to finish - try: - idp['METADATA_URL_UPDATE_THREAD'].join() - except KeyError: - pass - assert caplog.records[-1].levelname == 'ERROR' + # test http get is not redone before 5 minutes + freezer.move_to(datetime.timedelta(seconds=4 * 60)) + assert adapter.load_metadata(idp, 0) == metadata + wait_for_update_thread() + assert request.call_count == 1 + freezer.move_to(datetime.timedelta(seconds=2 * 60)) + assert adapter.load_metadata(idp, 0) == metadata + wait_for_update_thread() + assert request.call_count == 2 + + # test error after 24hours + caplog.clear() + freezer.move_to(datetime.timedelta(seconds=3600 * 23)) + assert adapter.load_metadata(idp, 0) == metadata + assert 'ERROR' in caplog.text + assert 'not updated since 25' in caplog.text diff --git a/tox.ini b/tox.ini index 96ddce2..e36df85 100644 --- a/tox.ini +++ b/tox.ini @@ -25,13 +25,13 @@ deps = pytest-mock pytest-django pytest-freezegun - pytest-localserver pytz lxml cssselect django-webtest>1.9.3 WebTest pyquery + responses allowlist_externals = ./getlasso3.sh commands =