adapters: do not log errors on cold cache (#84933)
gitea/django-mellon/pipeline/head This commit looks good Details

Only log errors if the cache is older than 24 hours.
This commit is contained in:
Benjamin Dauvergne 2023-12-19 17:36:11 +01:00
parent 200e009b1e
commit af81da4954
3 changed files with 157 additions and 132 deletions

View File

@ -78,83 +78,100 @@ class DefaultAdapter:
yield idp
def load_metadata_path(self, idp, i):
now = time.time()
path = idp['METADATA_PATH']
if not os.path.exists(path):
logger.warning('mellon: metadata path %s does not exist', path)
logger.error('mellon: metadata path %s does not exist', path)
return
last_update = idp.get('METADATA_PATH_LAST_UPDATE', 0)
try:
mtime = os.stat(path).st_mtime
except OSError as e:
logger.warning('mellon: metadata path %s : stat() call failed, %s', path, e)
return
if last_update == 0 or mtime >= last_update:
idp['METADATA_PATH_LAST_UPDATE'] = time.time()
metadata_last_update = idp.get('METADATA_LAST_UPDATE', None)
if not idp.get('METADATA'):
should_load = True
elif not metadata_last_update:
should_load = True
else:
try:
with open(path) as fd:
metadata = fd.read()
mtime = os.stat(path).st_mtime
except OSError as e:
logger.warning('mellon: metadata path %s : open()/read() call failed, %s', path, e)
logger.error('mellon: metadata path %s : stat() call failed, %s', path, e)
return
entity_id = self.load_entity_id(metadata, i)
if not entity_id:
logger.error('mellon: invalid metadata file retrieved from %s', path)
return
if 'ENTITY_ID' in idp and idp['ENTITY_ID'] != entity_id:
logger.error(
'mellon: metadata path %s : entityID changed %r != %r', path, entity_id, idp['ENTITY_ID']
)
del idp['ENTITY_ID']
idp['METADATA'] = metadata
should_load = metadata_last_update <= mtime
if not should_load:
return
try:
with open(path) as fd:
metadata = fd.read()
except OSError as e:
logger.error('mellon: metadata path %s : open()/read() call failed, %s', path, e)
return
entity_id = self.load_entity_id(metadata, i)
if not entity_id:
logger.error('mellon: invalid metadata file retrieved from %s', path)
return
if 'ENTITY_ID' in idp and idp['ENTITY_ID'] != entity_id:
raise RuntimeError(
f'mellon: metadata path {path} : entityID changed {entity_id!r} != {idp["ENTITY_ID"]!r}'
)
idp['METADATA'] = metadata
idp['METADATA_LAST_UPDATE'] = now
def load_metadata_url(self, idp, i):
now = time.time()
url = idp['METADATA_URL']
metadata_cache_time = utils.get_setting(idp, 'METADATA_CACHE_TIME')
timeout = utils.get_setting(idp, 'METADATA_HTTP_TIMEOUT')
warning = logger.warning
if 'METADATA' not in idp:
# if we have no metadata in cache, we must emit errors
warning = logger.error
metadata_last_update = idp.get('METADATA_LAST_UPDATE', None)
metadata_last_url_update = idp.get('METADATA_LAST_URL_CHECK', 0)
try:
hostname = urlparse(url).hostname
except (ValueError, TypeError) as e:
warning('invalid METADATA_URL %r: %s', url, e)
logger.error('invalid METADATA_URL %r: %s', url, e)
return
if not hostname:
warning('no hostname in METADATA_URL %r: %s', url)
logger.error('no hostname in METADATA_URL %r: %s', url)
return
last_update = idp.get('METADATA_URL_LAST_UPDATE', 0)
now = time.time()
try:
url_fingerprint = hashlib.md5(url.encode('ascii')).hexdigest()
file_cache_key = '%s_%s.xml' % (hostname, url_fingerprint)
except (UnicodeError, TypeError, ValueError):
warning('unable to compute file_cache_key')
logger.error('unable to compute file_cache_key')
return
cache_directory = default_storage.path('mellon_metadata_cache')
file_cache_path = os.path.join(cache_directory, file_cache_key)
if metadata_cache_time:
# METADATA_CACHE_TIME == 0 disable the file cache
if not os.path.exists(cache_directory):
os.makedirs(cache_directory)
if not os.path.exists(cache_directory):
os.makedirs(cache_directory)
if os.path.exists(file_cache_path) and 'METADATA' not in idp:
try:
with open(file_cache_path) as fd:
idp['METADATA'] = fd.read()
# use file cache mtime as last_update time, prevent too many loading from different workers
last_update = max(last_update, os.stat(file_cache_path).st_mtime)
except OSError:
warning('metadata url %s : error when loading the file cache %s', url, file_cache_path)
if os.path.exists(file_cache_path) and 'METADATA' not in idp:
try:
with open(file_cache_path) as fd:
metadata = fd.read()
# use file cache mtime as last_update time, prevent too many loading from different workers
except OSError:
logger.error('metadata url %s : error when loading the file cache %s', url, file_cache_path)
else:
idp['METADATA'] = metadata
metadata_last_update = idp['METADATA_LAST_UPDATE'] = os.stat(file_cache_path).st_mtime
# fresh cache, skip loading
if last_update and 'METADATA' in idp and (now - last_update) < metadata_cache_time:
if (
metadata_last_update
and 'METADATA' in idp
and (
(now - metadata_last_update) < metadata_cache_time
# if HTTP GET is failing, try every 5 minutes
or (now - metadata_last_url_update) < 60 * 5
)
):
return
def __http_get():
@ -164,12 +181,12 @@ class DefaultAdapter:
response = requests.get(url, verify=verify_ssl_certificate, timeout=timeout)
response.raise_for_status()
except requests.exceptions.RequestException as e:
warning('metadata url %s : HTTP request failed %s', url, e)
logger.warning('metadata url %s : HTTP request failed %s', url, e)
return
entity_id = self.load_entity_id(response.text, i)
if not entity_id:
warning('invalid metadata file retrieved from %s', url)
logger.warning('invalid metadata file retrieved from %s', url)
return
if 'ENTITY_ID' in idp and idp['ENTITY_ID'] != entity_id:
@ -177,11 +194,13 @@ class DefaultAdapter:
logger.error(
'mellon: metadata url %s, entityID changed %r != %r', url, entity_id, idp['ENTITY_ID']
)
del idp['ENTITY_ID']
return
if idp.get('METADATA') != response.text:
idp['METADATA'] = response.text
logger.info('mellon: metadata url %s updated', url)
idp['METADATA_LAST_UPDATE'] = now
idp['METADATA'] = response.text
idp['METADATA_URL_LAST_UPDATE'] = now
if metadata_cache_time:
try:
with atomic_write(file_cache_path, mode='wb', overwrite=True) as fd:
fd.write(response.text.encode('utf-8'))
@ -192,46 +211,38 @@ class DefaultAdapter:
file_cache_path,
e,
)
idp['METADATA_PATH'] = file_cache_path
# prevent reloading of the file cache immediately
idp['METADATA_PATH_LAST_UPDATE'] = time.time() + 1
logger.info('mellon: metadata url %s updated', url)
finally:
# release thread object
idp.pop('METADATA_URL_UPDATE_THREAD', None)
# emit an error if cache is too old
if metadata_cache_time:
stale_timeout = 24 * metadata_cache_time
if last_update and (now - idp['METADATA_URL_LAST_UPDATE']) > stale_timeout:
logger.error(
'mellon: metadata url %s: not updated since %.1f hours',
url,
stale_timeout / 3600.0,
)
# we have cache, update in background
if last_update and 'METADATA' in idp:
if metadata_last_update:
# we have cache, update in background
idp['METADATA_LAST_URL_CHECK'] = time.time()
t = threading.Thread(target=__http_get)
t.start()
# store thread in idp for tests
idp['METADATA_URL_UPDATE_THREAD'] = t
# suspend updates for HTTP timeout + 5 seconds
idp['METADATA_URL_LAST_UPDATE'] = last_update + timeout + 5
else:
# synchronous update
__http_get()
def load_metadata(self, idp, i):
# legacy support
now = time.time()
if 'METADATA' in idp and idp['METADATA'].startswith('/'):
# support legacy configuration
idp['METADATA_PATH'] = idp['METADATA']
del idp['METADATA']
if 'METADATA_PATH' in idp:
self.load_metadata_path(idp, i)
metadata_cache_time = utils.get_setting(idp, 'METADATA_CACHE_TIME')
metadata_last_update = idp.get('METADATA_LAST_UPDATE', None)
age = metadata_last_update and (now - metadata_last_update)
if 'METADATA_URL' in idp:
self.load_metadata_url(idp, i)
if age is None or age > metadata_cache_time:
if 'METADATA_PATH' in idp:
self.load_metadata_path(idp, i)
elif 'METADATA_URL' in idp:
self.load_metadata_url(idp, i)
if 'METADATA' in idp:
if 'ENTITY_ID' not in idp:
@ -239,21 +250,28 @@ class DefaultAdapter:
if entity_id:
idp['ENTITY_ID'] = entity_id
if age and age > (24 * metadata_cache_time):
logger.error(
'mellon: metadata for idp %s: not updated since %.1f hours',
idp.get('ENTITY_ID', i),
age / 3600.0,
)
if 'ENTITY_ID' in idp:
return idp['METADATA']
logger.error('mellon: could not load metadata for %d-th idp: %s', i, idp)
return None
def load_entity_id(self, metadata, i):
try:
doc = ET.fromstring(metadata)
except (TypeError, ET.ParseError):
logger.error('mellon: METADATA of %d-th idp is invalid', i)
return None
if doc.tag != '{%s}EntityDescriptor' % lasso.SAML2_METADATA_HREF:
logger.error('mellon: METADATA of %d-th idp has no EntityDescriptor root tag', i)
return None
if 'entityID' not in doc.attrib:
logger.error('mellon: METADATA of %d-th idp has no entityID attribute on its root tag', i)
return None
return doc.attrib['entityID']

View File

@ -22,6 +22,7 @@ from unittest import mock
import lasso
import pytest
import responses
from django.contrib import auth
from django.db import connection
@ -381,10 +382,13 @@ def test_load_metadata_simple(adapter, metadata):
assert adapter.load_metadata(idp, 0) == metadata
def test_load_metadata_legacy(adapter, metadata_path, metadata):
def test_load_metadata_legacy(adapter, metadata_path, metadata, freezer):
now = time.time()
idp = {'METADATA': metadata_path}
assert adapter.load_metadata(idp, 0) == metadata
assert idp['METADATA'] == metadata
assert idp['METADATA_PATH'] == metadata_path
assert idp['METADATA_LAST_UPDATE'] == now
def test_load_metadata_path(adapter, metadata_path, metadata, freezer):
@ -392,71 +396,74 @@ def test_load_metadata_path(adapter, metadata_path, metadata, freezer):
idp = {'METADATA_PATH': str(metadata_path)}
assert adapter.load_metadata(idp, 0) == metadata
assert idp['METADATA'] == metadata
assert idp['METADATA_PATH_LAST_UPDATE'] == now
assert idp['METADATA_LAST_UPDATE'] == now
def test_load_metadata_url(settings, adapter, metadata, httpserver, freezer, caplog):
now = time.time()
httpserver.serve_content(content=metadata, headers={'Content-Type': 'application/xml'})
idp = {'METADATA_URL': httpserver.url}
METADATA_URL = 'https://example.com/metadata'
@responses.activate
def test_load_metadata_url(settings, adapter, metadata, freezer, caplog):
idp = {'METADATA_URL': METADATA_URL}
def wait_for_update_thread():
# wait for update thread to finish
try:
idp['METADATA_URL_UPDATE_THREAD'].join()
except KeyError:
pass
# test normal loading
responses.get(METADATA_URL, body=metadata, headers={'Content-Type': 'application/xml'})
assert adapter.load_metadata(idp, 0) == metadata
assert 'METADATA_URL_UPDATE_THREAD' not in idp
assert idp['METADATA'] == metadata
assert idp['METADATA_URL_LAST_UPDATE'] == now
assert 'METADATA_PATH' in idp
assert idp['METADATA_PATH'].startswith(settings.MEDIA_ROOT)
with open(idp['METADATA_PATH']) as fd:
assert fd.read() == metadata
assert idp['METADATA_PATH_LAST_UPDATE'] == now + 1
httpserver.serve_content(
content=metadata.replace('idp5', 'idp6'), headers={'Content-Type': 'application/xml'}
)
assert adapter.load_metadata(idp, 0) == metadata
assert idp['METADATA_LAST_UPDATE'] == time.time()
freezer.move_to(datetime.timedelta(seconds=3601))
# test ENTITY_ID change
caplog.clear()
freezer.move_to(datetime.timedelta(seconds=3601))
responses.replace(
responses.GET,
METADATA_URL,
body=metadata.replace('idp5', 'idp6'),
headers={'Content-Type': 'application/xml'},
)
assert adapter.load_metadata(idp, 0) == metadata
# wait for update thread to finish
try:
idp['METADATA_URL_UPDATE_THREAD'].join()
except KeyError:
pass
new_meta = adapter.load_metadata(idp, 0)
assert new_meta != metadata
assert new_meta == metadata.replace('idp5', 'idp6')
wait_for_update_thread()
assert 'WARNING' not in caplog.text
assert any(
'entityID changed' in record.message and record.levelname == 'ERROR' for record in caplog.records
)
# test load from file cache
del idp['METADATA']
del idp['METADATA_PATH']
del idp['METADATA_PATH_LAST_UPDATE']
httpserver.serve_content(content='', headers={'Content-Type': 'application/xml'})
assert adapter.load_metadata(idp, 0) == metadata.replace('idp5', 'idp6')
def test_load_metadata_url_stale_timeout(settings, adapter, metadata, httpserver, freezer, caplog):
httpserver.serve_content(content=metadata, headers={'Content-Type': 'application/xml'})
idp = {'METADATA_URL': httpserver.url}
assert adapter.load_metadata(idp, 0) == metadata
httpserver.serve_content(content='', headers={'Content-Type': 'application/xml'})
assert adapter.load_metadata(idp, 0) == metadata
freezer.move_to(datetime.timedelta(seconds=24 * 3600 - 1))
assert adapter.load_metadata(idp, 0) == metadata
# wait for update thread to finish
try:
idp['METADATA_URL_UPDATE_THREAD'].join()
except KeyError:
pass
assert caplog.records[-1].levelname == 'WARNING'
freezer.move_to(datetime.timedelta(seconds=3601))
caplog.clear()
del idp['METADATA']
request = responses.replace(
responses.GET, METADATA_URL, body='', headers={'Content-Type': 'application/xml'}
)
assert adapter.load_metadata(idp, 0) == metadata
wait_for_update_thread()
assert len(caplog.records) == 1
assert caplog.records[0].levelname == 'WARNING'
assert 'invalid metadata' in caplog.records[0].message
assert request.call_count == 1
# wait for update thread to finish
try:
idp['METADATA_URL_UPDATE_THREAD'].join()
except KeyError:
pass
assert caplog.records[-1].levelname == 'ERROR'
# test http get is not redone before 5 minutes
freezer.move_to(datetime.timedelta(seconds=4 * 60))
assert adapter.load_metadata(idp, 0) == metadata
wait_for_update_thread()
assert request.call_count == 1
freezer.move_to(datetime.timedelta(seconds=2 * 60))
assert adapter.load_metadata(idp, 0) == metadata
wait_for_update_thread()
assert request.call_count == 2
# test error after 24hours
caplog.clear()
freezer.move_to(datetime.timedelta(seconds=3600 * 23))
assert adapter.load_metadata(idp, 0) == metadata
assert 'ERROR' in caplog.text
assert 'not updated since 25' in caplog.text

View File

@ -25,13 +25,13 @@ deps =
pytest-mock
pytest-django
pytest-freezegun
pytest-localserver
pytz
lxml
cssselect
django-webtest>1.9.3
WebTest
pyquery
responses
allowlist_externals =
./getlasso3.sh
commands =