prometheus: ignore dovecot lost LDAP connection errors

This commit is contained in:
Frédéric Péters 2021-06-30 08:30:27 +02:00
parent bdb1e5fecf
commit 96765164c8
1 changed files with 15 additions and 1 deletions

View File

@ -21,6 +21,13 @@ from prometheus_client.exposition import generate_latest
from prometheus_client.parser import text_string_to_metric_families
JOURNALD_IGNORED_ERRORS = {
'dovecot.service': [
'Connection lost to LDAP server, reconnecting',
],
}
apt_cache = apt.Cache()
registry = CollectorRegistry()
eo_errors = Gauge("eo_errors", "failed tests", ["ctn"], registry=registry)
@ -90,8 +97,15 @@ def journald(ctn):
j.seek_realtime(fifteen)
j.add_match(PRIORITY=3)
for e in j:
eo_journal.labels(ctn, "error").inc()
msg = e["MESSAGE"]
ignored_message = False
for ignored_string in JOURNALD_IGNORED_ERRORS.get(e.get('_SYSTEMD_UNIT')) or []:
if ignored_string in msg:
ignored_message = True
break
if ignored_message:
continue
eo_journal.labels(ctn, "error").inc()
if "Connected -> NetworkFailure" in msg or "task nfsd" in msg:
eo_journal.labels(ctn, "network_failure").inc()