postgresql: add monitoring of archiver failures (#66070)
This commit is contained in:
parent
5a77f636af
commit
82718a9d33
|
@ -190,24 +190,29 @@ def postgresql(ctn):
|
|||
version = cluster["version"]
|
||||
name = cluster["cluster"]
|
||||
eo_postgresql.labels(ctn, version, name, "", "running").set(cluster["running"])
|
||||
if "recovery" in cluster and cluster["recovery"] and cluster["running"]:
|
||||
# we are on a standby, check it's connected to a master
|
||||
receiver_statuses = run("sudo -u postgres psql -p %s -tAc 'select status from pg_stat_wal_receiver ;'" % cluster["port"])
|
||||
eo_postgresql.labels(ctn, version, name, "", "replicating").set(0)
|
||||
for status in receiver_statuses.splitlines():
|
||||
if status == "streaming":
|
||||
eo_postgresql.labels(ctn, version, name, "", "replicating").inc()
|
||||
elif cluster["running"]:
|
||||
# we are on a primary... check the slots are good
|
||||
slots = run("sudo -u postgres psql -p %s -tAc 'select active, slot_name, pg_wal_lsn_diff(pg_current_wal_lsn() , restart_lsn) from pg_replication_slots where active;'" % cluster["port"])
|
||||
active_slot_count = 0
|
||||
for slot in slots.splitlines():
|
||||
active, slot_name, delta = slot.split("|")
|
||||
eo_postgresql.labels(ctn, version, name, slot_name, "slot-active").set(active == 't')
|
||||
eo_postgresql.labels(ctn, version, name, slot_name, "slot-delta").set(int(delta))
|
||||
if active == 't':
|
||||
active_slot_count += 1
|
||||
eo_postgresql.labels(ctn, version, name, "", "active-slot-count").set(active_slot_count)
|
||||
if cluster["running"]:
|
||||
# check the archiver status
|
||||
archiver_failures = run("sudo -u postgres psql -p %s -tAc 'select failed_count from pg_stat_archiver;'" % cluster["port"])
|
||||
eo_postgresql.labels(ctn, version, name, "", "archive_failed").set(int(archiver_failures))
|
||||
|
||||
if "recovery" in cluster and cluster["recovery"]:
|
||||
# we are on a standby, check it's connected to a master
|
||||
receiver_statuses = run("sudo -u postgres psql -p %s -tAc 'select status from pg_stat_wal_receiver ;'" % cluster["port"])
|
||||
eo_postgresql.labels(ctn, version, name, "", "replicating").set(0)
|
||||
for status in receiver_statuses.splitlines():
|
||||
if status == "streaming":
|
||||
eo_postgresql.labels(ctn, version, name, "", "replicating").inc()
|
||||
else:
|
||||
# we are on a primary... check the slots are good
|
||||
slots = run("sudo -u postgres psql -p %s -tAc 'select active, slot_name, pg_wal_lsn_diff(pg_current_wal_lsn() , restart_lsn) from pg_replication_slots where active;'" % cluster["port"])
|
||||
active_slot_count = 0
|
||||
for slot in slots.splitlines():
|
||||
active, slot_name, delta = slot.split("|")
|
||||
eo_postgresql.labels(ctn, version, name, slot_name, "slot-active").set(active == 't')
|
||||
eo_postgresql.labels(ctn, version, name, slot_name, "slot-delta").set(int(delta))
|
||||
if active == 't':
|
||||
active_slot_count += 1
|
||||
eo_postgresql.labels(ctn, version, name, "", "active-slot-count").set(active_slot_count)
|
||||
|
||||
def rabbitmq(ctn):
|
||||
rabbitmqctl = "/usr/sbin/rabbitmqctl"
|
||||
|
|
Loading…
Reference in New Issue