add prometheus-entrouvert-exporter
This commit is contained in:
parent
21dad6b7b1
commit
2ee70fe856
1
Makefile
1
Makefile
|
@ -5,6 +5,7 @@ NAME = publik-infra
|
|||
all:
|
||||
|
||||
DIST_FILES = \
|
||||
prometheus-entrouvert-exporter \
|
||||
sysadmin-doc
|
||||
|
||||
clean:
|
||||
|
|
|
@ -14,3 +14,16 @@ Package: publik-sysadmin-doc
|
|||
Architecture: all
|
||||
Depends: ${misc:Depends}
|
||||
Description: Documentation for doc-publik.entrouvert.com/guide-de-l-administrateur-systeme/
|
||||
|
||||
Package: prometheus-entrouvert-exporter
|
||||
Architecture: all
|
||||
Depends: ${misc:Depends},
|
||||
prometheus-node-exporter,
|
||||
python3-apt,
|
||||
python3-cryptography,
|
||||
python3-dbus,
|
||||
python3-git,
|
||||
python3-prometheus-client,
|
||||
python3-psutil,
|
||||
python3-systemd
|
||||
Description: usefull metrics exporter as node exporter textfile
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
prometheus-entrouvert-exporter/prometheus-entrouvert-exporter.py usr/bin
|
|
@ -0,0 +1,6 @@
|
|||
[Unit]
|
||||
Description=Collect Entrouvert metrics
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/bash -c "/usr/bin/prometheus-entrouvert-exporter.py > /var/lib/prometheus/node-exporter/entrouvert.prom"
|
|
@ -0,0 +1,9 @@
|
|||
[Unit]
|
||||
Description=Run entrouvert metrics collection every 15 minutes
|
||||
|
||||
[Timer]
|
||||
OnBootSec=0
|
||||
OnUnitActiveSec=15min
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
|
@ -0,0 +1,206 @@
|
|||
#!/usr/bin/python3
|
||||
import apt
|
||||
import argparse
|
||||
from cryptography import x509
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
import datetime
|
||||
import dbus
|
||||
import mailbox
|
||||
from git import Repo
|
||||
import glob
|
||||
import os
|
||||
import psutil
|
||||
import requests
|
||||
import shlex
|
||||
import subprocess
|
||||
from systemd import login, journal
|
||||
import time
|
||||
|
||||
from prometheus_client import CollectorRegistry, Gauge
|
||||
from prometheus_client.exposition import generate_latest
|
||||
from prometheus_client.parser import text_string_to_metric_families
|
||||
|
||||
|
||||
apt_cache = apt.Cache()
|
||||
registry = CollectorRegistry()
|
||||
eo_errors = Gauge("eo_errors", "failed tests", ["ctn"], registry=registry)
|
||||
eo_certificates = Gauge("eo_certificates", "certificates", ["ctn", "name"], registry=registry)
|
||||
eo_debian = Gauge("eo_debian", "debian os", ["ctn"], registry=registry)
|
||||
eo_etckeeper = Gauge("eo_etckeeper", "etckeeper", ["ctn", "name"], registry=registry)
|
||||
eo_exim = Gauge("eo_exim", "exim", ["ctn", "name"], registry=registry)
|
||||
eo_journal = Gauge("eo_journal", "journald", ["ctn", "name"], registry=registry)
|
||||
eo_local_changes = Gauge("eo_local_changes", "", ["ctn"], registry=registry)
|
||||
eo_mailboxes = Gauge("eo_mailboxes", "emails in local mailboxes", ["ctn", "name"], registry=registry)
|
||||
eo_munin = Gauge("eo_munin", "munin", ["ctn", "name"], registry=registry)
|
||||
eo_nginx = Gauge("eo_nginx", "nginx", ["ctn", "name"], registry=registry)
|
||||
eo_postgresql = Gauge("eo_postgresql", "postgresql", ["ctn", "role", "name"], registry=registry)
|
||||
eo_rabbitmq = Gauge("eo_rabbitmq", "rabbitmq", ["ctn"], registry=registry)
|
||||
eo_threads = Gauge("eo_threads", "system threads", ["ctn"], registry=registry)
|
||||
eo_units = Gauge("eo_failed_units", "systemd failed units", ["ctn", "state"], registry=registry)
|
||||
eo_packages = Gauge("eo_packages", "packages", ["ctn", "state"], registry=registry)
|
||||
eo_sessions = Gauge("eo_sessions", "sessions", ["ctn"], registry=registry)
|
||||
|
||||
|
||||
def run(cmd):
|
||||
m = shlex.split(cmd)
|
||||
p = subprocess.Popen(m, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
return "\n".join([l.decode().rstrip() for l in p.stdout.readlines()])
|
||||
|
||||
|
||||
def debian(ctn):
|
||||
with open('/etc/debian_version') as fh:
|
||||
ve = float(fh.read().strip())
|
||||
eo_debian.labels(ctn).set(ve)
|
||||
|
||||
|
||||
def etckeeper(ctn):
|
||||
rep = Repo("/etc")
|
||||
eo_etckeeper.labels(ctn, "dirty").set(int(rep.is_dirty()))
|
||||
eo_etckeeper.labels(ctn, "untracked").set(len(rep.untracked_files))
|
||||
|
||||
|
||||
def exim(ctn):
|
||||
with open('/var/log/exim4/mainlog') as fh:
|
||||
for line in fh.readlines():
|
||||
if "**" in line:
|
||||
eo_exim.labels(ctn, "errors").inc()
|
||||
deferred = int(run("exim -bpc"))
|
||||
eo_exim.labels(ctn, "deferred").set(deferred)
|
||||
|
||||
|
||||
def certificates(ctn):
|
||||
certs = ["/etc/exim4/exim.crt"]
|
||||
for cert in certs:
|
||||
if os.path.isfile(cert):
|
||||
c = x509.load_pem_x509_certificate(open(cert).read().encode(), default_backend())
|
||||
remaining = (c.not_valid_after - datetime.datetime.today()).days
|
||||
eo_certificates.labels(ctn, cert).set(remaining)
|
||||
|
||||
|
||||
def journald(ctn):
|
||||
j = journal.Reader()
|
||||
fifteen = time.time() - 15 * 60
|
||||
j.seek_realtime(fifteen)
|
||||
j.add_match(PRIORITY=2)
|
||||
eo_journal.labels(ctn, "critical").set(len(list(j)))
|
||||
j.seek_realtime(fifteen)
|
||||
j.add_match(PRIORITY=3)
|
||||
for e in j:
|
||||
eo_journal.labels(ctn, "error").inc()
|
||||
msg = e["MESSAGE"]
|
||||
if "Connected -> NetworkFailure" in msg or "task nfsd" in msg:
|
||||
eo_journal.labels(ctn, "network_failure").inc()
|
||||
|
||||
|
||||
def local_changes(ctn):
|
||||
f = "/var/log/check-local-changes.log"
|
||||
if os.path.isfile(f):
|
||||
n = len(open(f).readlines())
|
||||
eo_local_changes.labels(ctn).set(n)
|
||||
|
||||
|
||||
def munin(ctn):
|
||||
f = "/var/log/munin/munin-node.log"
|
||||
if os.path.isfile(f):
|
||||
n = len([l for l in open(f).readlines() if "rror" in l and not "mail_space" in l])
|
||||
eo_munin.labels(ctn, "errors").set(n)
|
||||
|
||||
|
||||
def nginx(ctn):
|
||||
r = requests.get("http://localhost/nginx_status")
|
||||
if r.ok:
|
||||
for line in r.text:
|
||||
if "Active connections" in line:
|
||||
n = int(line.split(':')[1].strip())
|
||||
eo_nginx.labels(ctn, "connections").set(n)
|
||||
|
||||
|
||||
def packages(ctn):
|
||||
n = 0
|
||||
for pkg in apt_cache.get_changes():
|
||||
if pkg.isUpgradable:
|
||||
n += 1
|
||||
eo_packages.labels(ctn, "upgradable").set(n)
|
||||
|
||||
|
||||
def mailboxes(ctn):
|
||||
boxes = glob.glob("/var/spool/mail/*")
|
||||
for m in boxes:
|
||||
n = m.split("/")[-1]
|
||||
c = len(mailbox.mbox(m))
|
||||
eo_mailboxes.labels(ctn, n).set(c)
|
||||
|
||||
|
||||
def postgresql(ctn):
|
||||
if "postgresql" not in apt_cache:
|
||||
return
|
||||
recovery = glob.glob("/var/lib/postgresql/*/*/recovery.conf")
|
||||
if len(recovery) == 0:
|
||||
role = "primary"
|
||||
eo_postgresql.labels(ctn, role, "replicators").set(0)
|
||||
for p in psutil.process_iter():
|
||||
cmd = p.cmdline()
|
||||
if cmd and 'walsender' in cmd[0]:
|
||||
eo_postgresql.labels(ctn, role, "replicators").inc()
|
||||
else:
|
||||
role = "secondary"
|
||||
eo_postgresql.labels(ctn, role, "replicating").set(0)
|
||||
for p in psutil.process_iter():
|
||||
cmd = p.cmdline()
|
||||
if cmd and 'walreceiver' in cmd[0]:
|
||||
eo_postgresql.labels(ctn, role, "replicating").inc()
|
||||
|
||||
|
||||
def rabbitmq(ctn):
|
||||
rabbitmqctl = "/usr/sbin/rabbitmqctl"
|
||||
if os.path.isfile(rabbitmqctl):
|
||||
for i in run("%s list_queues messages" % rabbitmqctl):
|
||||
if i.isdigit():
|
||||
eo_rabbitmq.labels(ctn).inc(int(i))
|
||||
|
||||
|
||||
def sessions(ctn):
|
||||
n = len(psutil.users())
|
||||
eo_sessions.labels(ctn).set(n)
|
||||
|
||||
|
||||
def threads(ctn):
|
||||
for p in psutil.process_iter():
|
||||
eo_threads.labels(ctn).inc(p.num_threads())
|
||||
|
||||
|
||||
def units(ctn):
|
||||
bus = dbus.SystemBus()
|
||||
s = bus.get_object("org.freedesktop.systemd1", "/org/freedesktop/systemd1")
|
||||
manager = dbus.Interface(s, "org.freedesktop.systemd1.Manager")
|
||||
units = manager.ListUnits()
|
||||
n = len([u for u in units if u[3] == "failed"])
|
||||
eo_units.labels(ctn, "failed").set(n)
|
||||
|
||||
|
||||
def run_in_machines(ctn):
|
||||
for machine in login.machine_names():
|
||||
r = run("systemd-run --wait --pipe -q --machine %s /usr/bin/prometheus-entrouvert-exporter.py --ctn %s"
|
||||
% (machine, machine))
|
||||
current_metrics = text_string_to_metric_families(r)
|
||||
for m in current_metrics:
|
||||
for s in m.samples:
|
||||
metric = globals()[m.name]
|
||||
metric.labels(**s.labels).set(s.value)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--test", action="store_true", help="raise errors")
|
||||
parser.add_argument("--ctn", default="")
|
||||
args = parser.parse_args()
|
||||
|
||||
for test in [certificates, debian, etckeeper, exim, journald, local_changes, mailboxes, munin,
|
||||
nginx, packages, postgresql, rabbitmq, sessions, threads, units, run_in_machines]:
|
||||
try:
|
||||
test(args.ctn)
|
||||
except Exception:
|
||||
eo_errors.labels(ctn=args.ctn).inc()
|
||||
if args.test:
|
||||
raise
|
||||
print(generate_latest(registry).decode())
|
Loading…
Reference in New Issue