postgresql: redo monitoring with PG13 compatibility (#65244)
This commit is contained in:
parent
761d1c468c
commit
5a77f636af
|
@ -8,6 +8,7 @@ import dbus
|
|||
import mailbox
|
||||
from git import Repo
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import psutil
|
||||
import requests
|
||||
|
@ -46,7 +47,7 @@ eo_local_changes = Gauge("eo_local_changes", "", ["ctn"], registry=registry)
|
|||
eo_mailboxes = Gauge("eo_mailboxes", "emails in local mailboxes", ["ctn", "name"], registry=registry)
|
||||
eo_munin = Gauge("eo_munin", "munin", ["ctn", "name"], registry=registry)
|
||||
eo_nginx = Gauge("eo_nginx", "nginx", ["ctn", "name"], registry=registry)
|
||||
eo_postgresql = Gauge("eo_postgresql", "postgresql", ["ctn", "role", "name"], registry=registry)
|
||||
eo_postgresql = Gauge("eo_postgresql", "postgresql", ["ctn", "version", "cluster", "context", "name"], registry=registry)
|
||||
eo_rabbitmq = Gauge("eo_rabbitmq", "rabbitmq", ["ctn"], registry=registry)
|
||||
eo_threads = Gauge("eo_threads", "system threads", ["ctn"], registry=registry)
|
||||
eo_units = Gauge("eo_units", "systemd units", ["ctn", "name", "state"], registry=registry)
|
||||
|
@ -181,35 +182,32 @@ def mailboxes(ctn):
|
|||
|
||||
|
||||
def postgresql(ctn):
|
||||
if not glob.glob("/etc/postgresql/*/main/postgresql.conf"):
|
||||
if not os.path.exists("/usr/bin/pg_lsclusters"):
|
||||
return
|
||||
|
||||
def get_last_backup_delta():
|
||||
backup_files = glob.glob('/var/lib/postgresql/backups/base/*')
|
||||
if not backup_files:
|
||||
return -1
|
||||
sorted_backup_files = sorted(backup_files, key=os.path.getmtime)
|
||||
created = os.stat(sorted_backup_files[-1]).st_ctime
|
||||
return (datetime.datetime.now() - datetime.datetime.fromtimestamp(created)).total_seconds()
|
||||
|
||||
recovery = glob.glob("/var/lib/postgresql/*/*/recovery.conf")
|
||||
if len(recovery) == 0:
|
||||
role = "primary"
|
||||
|
||||
eo_postgresql.labels(ctn, role, "backup_delta").set(get_last_backup_delta())
|
||||
eo_postgresql.labels(ctn, role, "replicators").set(0)
|
||||
for p in psutil.process_iter():
|
||||
cmd = p.cmdline()
|
||||
if cmd and ('walsender' in cmd[0] or 'wal sender' in cmd[0]):
|
||||
eo_postgresql.labels(ctn, role, "replicators").inc()
|
||||
else:
|
||||
role = "secondary"
|
||||
eo_postgresql.labels(ctn, role, "replicating").set(0)
|
||||
for p in psutil.process_iter():
|
||||
cmd = p.cmdline()
|
||||
if cmd and 'walreceiver' in cmd[0]:
|
||||
eo_postgresql.labels(ctn, role, "replicating").inc()
|
||||
|
||||
clusters = json.loads(run("pg_lsclusters --json"))
|
||||
for cluster in clusters:
|
||||
version = cluster["version"]
|
||||
name = cluster["cluster"]
|
||||
eo_postgresql.labels(ctn, version, name, "", "running").set(cluster["running"])
|
||||
if "recovery" in cluster and cluster["recovery"] and cluster["running"]:
|
||||
# we are on a standby, check it's connected to a master
|
||||
receiver_statuses = run("sudo -u postgres psql -p %s -tAc 'select status from pg_stat_wal_receiver ;'" % cluster["port"])
|
||||
eo_postgresql.labels(ctn, version, name, "", "replicating").set(0)
|
||||
for status in receiver_statuses.splitlines():
|
||||
if status == "streaming":
|
||||
eo_postgresql.labels(ctn, version, name, "", "replicating").inc()
|
||||
elif cluster["running"]:
|
||||
# we are on a primary... check the slots are good
|
||||
slots = run("sudo -u postgres psql -p %s -tAc 'select active, slot_name, pg_wal_lsn_diff(pg_current_wal_lsn() , restart_lsn) from pg_replication_slots where active;'" % cluster["port"])
|
||||
active_slot_count = 0
|
||||
for slot in slots.splitlines():
|
||||
active, slot_name, delta = slot.split("|")
|
||||
eo_postgresql.labels(ctn, version, name, slot_name, "slot-active").set(active == 't')
|
||||
eo_postgresql.labels(ctn, version, name, slot_name, "slot-delta").set(int(delta))
|
||||
if active == 't':
|
||||
active_slot_count += 1
|
||||
eo_postgresql.labels(ctn, version, name, "", "active-slot-count").set(active_slot_count)
|
||||
|
||||
def rabbitmq(ctn):
|
||||
rabbitmqctl = "/usr/sbin/rabbitmqctl"
|
||||
|
|
Loading…
Reference in New Issue