add prometheus-entrouvert-exporter

This commit is contained in:
Christophe Siraut 2020-10-27 10:28:32 +01:00
parent 21dad6b7b1
commit 2ee70fe856
6 changed files with 236 additions and 0 deletions

View File

@ -5,6 +5,7 @@ NAME = publik-infra
all:
DIST_FILES = \
prometheus-entrouvert-exporter \
sysadmin-doc
clean:

13
debian/control vendored
View File

@ -14,3 +14,16 @@ Package: publik-sysadmin-doc
Architecture: all
Depends: ${misc:Depends}
Description: Documentation for doc-publik.entrouvert.com/guide-de-l-administrateur-systeme/
Package: prometheus-entrouvert-exporter
Architecture: all
Depends: ${misc:Depends},
prometheus-node-exporter,
python3-apt,
python3-cryptography,
python3-dbus,
python3-git,
python3-prometheus-client,
python3-psutil,
python3-systemd
Description: usefull metrics exporter as node exporter textfile

View File

@ -0,0 +1 @@
prometheus-entrouvert-exporter/prometheus-entrouvert-exporter.py usr/bin

View File

@ -0,0 +1,6 @@
[Unit]
Description=Collect Entrouvert metrics
[Service]
Type=oneshot
ExecStart=/bin/bash -c "/usr/bin/prometheus-entrouvert-exporter.py > /var/lib/prometheus/node-exporter/entrouvert.prom"

View File

@ -0,0 +1,9 @@
[Unit]
Description=Run entrouvert metrics collection every 15 minutes
[Timer]
OnBootSec=0
OnUnitActiveSec=15min
[Install]
WantedBy=timers.target

View File

@ -0,0 +1,206 @@
#!/usr/bin/python3
import apt
import argparse
from cryptography import x509
from cryptography.hazmat.backends import default_backend
import datetime
import dbus
import mailbox
from git import Repo
import glob
import os
import psutil
import requests
import shlex
import subprocess
from systemd import login, journal
import time
from prometheus_client import CollectorRegistry, Gauge
from prometheus_client.exposition import generate_latest
from prometheus_client.parser import text_string_to_metric_families
apt_cache = apt.Cache()
registry = CollectorRegistry()
eo_errors = Gauge("eo_errors", "failed tests", ["ctn"], registry=registry)
eo_certificates = Gauge("eo_certificates", "certificates", ["ctn", "name"], registry=registry)
eo_debian = Gauge("eo_debian", "debian os", ["ctn"], registry=registry)
eo_etckeeper = Gauge("eo_etckeeper", "etckeeper", ["ctn", "name"], registry=registry)
eo_exim = Gauge("eo_exim", "exim", ["ctn", "name"], registry=registry)
eo_journal = Gauge("eo_journal", "journald", ["ctn", "name"], registry=registry)
eo_local_changes = Gauge("eo_local_changes", "", ["ctn"], registry=registry)
eo_mailboxes = Gauge("eo_mailboxes", "emails in local mailboxes", ["ctn", "name"], registry=registry)
eo_munin = Gauge("eo_munin", "munin", ["ctn", "name"], registry=registry)
eo_nginx = Gauge("eo_nginx", "nginx", ["ctn", "name"], registry=registry)
eo_postgresql = Gauge("eo_postgresql", "postgresql", ["ctn", "role", "name"], registry=registry)
eo_rabbitmq = Gauge("eo_rabbitmq", "rabbitmq", ["ctn"], registry=registry)
eo_threads = Gauge("eo_threads", "system threads", ["ctn"], registry=registry)
eo_units = Gauge("eo_failed_units", "systemd failed units", ["ctn", "state"], registry=registry)
eo_packages = Gauge("eo_packages", "packages", ["ctn", "state"], registry=registry)
eo_sessions = Gauge("eo_sessions", "sessions", ["ctn"], registry=registry)
def run(cmd):
m = shlex.split(cmd)
p = subprocess.Popen(m, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return "\n".join([l.decode().rstrip() for l in p.stdout.readlines()])
def debian(ctn):
with open('/etc/debian_version') as fh:
ve = float(fh.read().strip())
eo_debian.labels(ctn).set(ve)
def etckeeper(ctn):
rep = Repo("/etc")
eo_etckeeper.labels(ctn, "dirty").set(int(rep.is_dirty()))
eo_etckeeper.labels(ctn, "untracked").set(len(rep.untracked_files))
def exim(ctn):
with open('/var/log/exim4/mainlog') as fh:
for line in fh.readlines():
if "**" in line:
eo_exim.labels(ctn, "errors").inc()
deferred = int(run("exim -bpc"))
eo_exim.labels(ctn, "deferred").set(deferred)
def certificates(ctn):
certs = ["/etc/exim4/exim.crt"]
for cert in certs:
if os.path.isfile(cert):
c = x509.load_pem_x509_certificate(open(cert).read().encode(), default_backend())
remaining = (c.not_valid_after - datetime.datetime.today()).days
eo_certificates.labels(ctn, cert).set(remaining)
def journald(ctn):
j = journal.Reader()
fifteen = time.time() - 15 * 60
j.seek_realtime(fifteen)
j.add_match(PRIORITY=2)
eo_journal.labels(ctn, "critical").set(len(list(j)))
j.seek_realtime(fifteen)
j.add_match(PRIORITY=3)
for e in j:
eo_journal.labels(ctn, "error").inc()
msg = e["MESSAGE"]
if "Connected -> NetworkFailure" in msg or "task nfsd" in msg:
eo_journal.labels(ctn, "network_failure").inc()
def local_changes(ctn):
f = "/var/log/check-local-changes.log"
if os.path.isfile(f):
n = len(open(f).readlines())
eo_local_changes.labels(ctn).set(n)
def munin(ctn):
f = "/var/log/munin/munin-node.log"
if os.path.isfile(f):
n = len([l for l in open(f).readlines() if "rror" in l and not "mail_space" in l])
eo_munin.labels(ctn, "errors").set(n)
def nginx(ctn):
r = requests.get("http://localhost/nginx_status")
if r.ok:
for line in r.text:
if "Active connections" in line:
n = int(line.split(':')[1].strip())
eo_nginx.labels(ctn, "connections").set(n)
def packages(ctn):
n = 0
for pkg in apt_cache.get_changes():
if pkg.isUpgradable:
n += 1
eo_packages.labels(ctn, "upgradable").set(n)
def mailboxes(ctn):
boxes = glob.glob("/var/spool/mail/*")
for m in boxes:
n = m.split("/")[-1]
c = len(mailbox.mbox(m))
eo_mailboxes.labels(ctn, n).set(c)
def postgresql(ctn):
if "postgresql" not in apt_cache:
return
recovery = glob.glob("/var/lib/postgresql/*/*/recovery.conf")
if len(recovery) == 0:
role = "primary"
eo_postgresql.labels(ctn, role, "replicators").set(0)
for p in psutil.process_iter():
cmd = p.cmdline()
if cmd and 'walsender' in cmd[0]:
eo_postgresql.labels(ctn, role, "replicators").inc()
else:
role = "secondary"
eo_postgresql.labels(ctn, role, "replicating").set(0)
for p in psutil.process_iter():
cmd = p.cmdline()
if cmd and 'walreceiver' in cmd[0]:
eo_postgresql.labels(ctn, role, "replicating").inc()
def rabbitmq(ctn):
rabbitmqctl = "/usr/sbin/rabbitmqctl"
if os.path.isfile(rabbitmqctl):
for i in run("%s list_queues messages" % rabbitmqctl):
if i.isdigit():
eo_rabbitmq.labels(ctn).inc(int(i))
def sessions(ctn):
n = len(psutil.users())
eo_sessions.labels(ctn).set(n)
def threads(ctn):
for p in psutil.process_iter():
eo_threads.labels(ctn).inc(p.num_threads())
def units(ctn):
bus = dbus.SystemBus()
s = bus.get_object("org.freedesktop.systemd1", "/org/freedesktop/systemd1")
manager = dbus.Interface(s, "org.freedesktop.systemd1.Manager")
units = manager.ListUnits()
n = len([u for u in units if u[3] == "failed"])
eo_units.labels(ctn, "failed").set(n)
def run_in_machines(ctn):
for machine in login.machine_names():
r = run("systemd-run --wait --pipe -q --machine %s /usr/bin/prometheus-entrouvert-exporter.py --ctn %s"
% (machine, machine))
current_metrics = text_string_to_metric_families(r)
for m in current_metrics:
for s in m.samples:
metric = globals()[m.name]
metric.labels(**s.labels).set(s.value)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--test", action="store_true", help="raise errors")
parser.add_argument("--ctn", default="")
args = parser.parse_args()
for test in [certificates, debian, etckeeper, exim, journald, local_changes, mailboxes, munin,
nginx, packages, postgresql, rabbitmq, sessions, threads, units, run_in_machines]:
try:
test(args.ctn)
except Exception:
eo_errors.labels(ctn=args.ctn).inc()
if args.test:
raise
print(generate_latest(registry).decode())