postgresql: redo monitoring with PG13 compatibility (#65244)
This commit is contained in:
parent
761d1c468c
commit
5a77f636af
|
@ -8,6 +8,7 @@ import dbus
|
||||||
import mailbox
|
import mailbox
|
||||||
from git import Repo
|
from git import Repo
|
||||||
import glob
|
import glob
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import psutil
|
import psutil
|
||||||
import requests
|
import requests
|
||||||
|
@ -46,7 +47,7 @@ eo_local_changes = Gauge("eo_local_changes", "", ["ctn"], registry=registry)
|
||||||
eo_mailboxes = Gauge("eo_mailboxes", "emails in local mailboxes", ["ctn", "name"], registry=registry)
|
eo_mailboxes = Gauge("eo_mailboxes", "emails in local mailboxes", ["ctn", "name"], registry=registry)
|
||||||
eo_munin = Gauge("eo_munin", "munin", ["ctn", "name"], registry=registry)
|
eo_munin = Gauge("eo_munin", "munin", ["ctn", "name"], registry=registry)
|
||||||
eo_nginx = Gauge("eo_nginx", "nginx", ["ctn", "name"], registry=registry)
|
eo_nginx = Gauge("eo_nginx", "nginx", ["ctn", "name"], registry=registry)
|
||||||
eo_postgresql = Gauge("eo_postgresql", "postgresql", ["ctn", "role", "name"], registry=registry)
|
eo_postgresql = Gauge("eo_postgresql", "postgresql", ["ctn", "version", "cluster", "context", "name"], registry=registry)
|
||||||
eo_rabbitmq = Gauge("eo_rabbitmq", "rabbitmq", ["ctn"], registry=registry)
|
eo_rabbitmq = Gauge("eo_rabbitmq", "rabbitmq", ["ctn"], registry=registry)
|
||||||
eo_threads = Gauge("eo_threads", "system threads", ["ctn"], registry=registry)
|
eo_threads = Gauge("eo_threads", "system threads", ["ctn"], registry=registry)
|
||||||
eo_units = Gauge("eo_units", "systemd units", ["ctn", "name", "state"], registry=registry)
|
eo_units = Gauge("eo_units", "systemd units", ["ctn", "name", "state"], registry=registry)
|
||||||
|
@ -181,35 +182,32 @@ def mailboxes(ctn):
|
||||||
|
|
||||||
|
|
||||||
def postgresql(ctn):
|
def postgresql(ctn):
|
||||||
if not glob.glob("/etc/postgresql/*/main/postgresql.conf"):
|
if not os.path.exists("/usr/bin/pg_lsclusters"):
|
||||||
return
|
return
|
||||||
|
|
||||||
def get_last_backup_delta():
|
clusters = json.loads(run("pg_lsclusters --json"))
|
||||||
backup_files = glob.glob('/var/lib/postgresql/backups/base/*')
|
for cluster in clusters:
|
||||||
if not backup_files:
|
version = cluster["version"]
|
||||||
return -1
|
name = cluster["cluster"]
|
||||||
sorted_backup_files = sorted(backup_files, key=os.path.getmtime)
|
eo_postgresql.labels(ctn, version, name, "", "running").set(cluster["running"])
|
||||||
created = os.stat(sorted_backup_files[-1]).st_ctime
|
if "recovery" in cluster and cluster["recovery"] and cluster["running"]:
|
||||||
return (datetime.datetime.now() - datetime.datetime.fromtimestamp(created)).total_seconds()
|
# we are on a standby, check it's connected to a master
|
||||||
|
receiver_statuses = run("sudo -u postgres psql -p %s -tAc 'select status from pg_stat_wal_receiver ;'" % cluster["port"])
|
||||||
recovery = glob.glob("/var/lib/postgresql/*/*/recovery.conf")
|
eo_postgresql.labels(ctn, version, name, "", "replicating").set(0)
|
||||||
if len(recovery) == 0:
|
for status in receiver_statuses.splitlines():
|
||||||
role = "primary"
|
if status == "streaming":
|
||||||
|
eo_postgresql.labels(ctn, version, name, "", "replicating").inc()
|
||||||
eo_postgresql.labels(ctn, role, "backup_delta").set(get_last_backup_delta())
|
elif cluster["running"]:
|
||||||
eo_postgresql.labels(ctn, role, "replicators").set(0)
|
# we are on a primary... check the slots are good
|
||||||
for p in psutil.process_iter():
|
slots = run("sudo -u postgres psql -p %s -tAc 'select active, slot_name, pg_wal_lsn_diff(pg_current_wal_lsn() , restart_lsn) from pg_replication_slots where active;'" % cluster["port"])
|
||||||
cmd = p.cmdline()
|
active_slot_count = 0
|
||||||
if cmd and ('walsender' in cmd[0] or 'wal sender' in cmd[0]):
|
for slot in slots.splitlines():
|
||||||
eo_postgresql.labels(ctn, role, "replicators").inc()
|
active, slot_name, delta = slot.split("|")
|
||||||
else:
|
eo_postgresql.labels(ctn, version, name, slot_name, "slot-active").set(active == 't')
|
||||||
role = "secondary"
|
eo_postgresql.labels(ctn, version, name, slot_name, "slot-delta").set(int(delta))
|
||||||
eo_postgresql.labels(ctn, role, "replicating").set(0)
|
if active == 't':
|
||||||
for p in psutil.process_iter():
|
active_slot_count += 1
|
||||||
cmd = p.cmdline()
|
eo_postgresql.labels(ctn, version, name, "", "active-slot-count").set(active_slot_count)
|
||||||
if cmd and 'walreceiver' in cmd[0]:
|
|
||||||
eo_postgresql.labels(ctn, role, "replicating").inc()
|
|
||||||
|
|
||||||
|
|
||||||
def rabbitmq(ctn):
|
def rabbitmq(ctn):
|
||||||
rabbitmqctl = "/usr/sbin/rabbitmqctl"
|
rabbitmqctl = "/usr/sbin/rabbitmqctl"
|
||||||
|
|
Loading…
Reference in New Issue