misc: optimize expired logs cleaning (#88953)
gitea/passerelle/pipeline/head This commit looks good Details

This commit is contained in:
Benjamin Dauvergne 2024-04-02 12:03:35 +02:00
parent 2a0e702a05
commit 2d7e4cddd9
4 changed files with 103 additions and 15 deletions

View File

@ -20,6 +20,7 @@ import traceback
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from passerelle.base.models import ResourceLog
from passerelle.views import get_all_apps
@ -52,6 +53,10 @@ class Command(BaseCommand):
if frequency not in ('every5min', 'hourly', 'daily', 'weekly', 'monthly', 'availability', 'jobs'):
raise CommandError('unknown frequency')
errors = []
if frequency == 'daily':
ResourceLog.daily()
for app in get_all_apps():
for connector in app.objects.all():
if options.get('connector') and connector.get_connector_slug() != options.get('connector'):

View File

@ -1,7 +1,9 @@
import base64
import collections
import collections.abc
import copy
import datetime
import functools
import inspect
import itertools
import logging
@ -493,15 +495,6 @@ class BaseResource(models.Model):
instance.set_log_level(d['log_level'])
return instance
def clean_logs(self):
# clean logs
timestamp = timezone.now() - datetime.timedelta(
days=self.logging_parameters.log_retention_days or settings.LOG_RETENTION_DAYS
)
ResourceLog.objects.filter(
appname=self.get_connector_slug(), slug=self.slug, timestamp__lt=timestamp
).delete()
def check_status(self):
# should raise an exception if status is not ok
raise NotImplementedError
@ -599,7 +592,7 @@ class BaseResource(models.Model):
pass
def daily(self):
self.clean_logs()
pass
def weekly(self):
pass
@ -933,6 +926,60 @@ class ResourceLog(models.Model):
return v
return level_name
@classmethod
def daily(cls):
cls.delete_expired_logs()
# VACUUM ANALYZE cannot be run inside transactions, so dont break
# tests calling 'cron daily' from inside a test transaction.
if not connection.in_atomic_block:
cls.vacuum_analyze()
@classmethod
def vacuum_analyze(cls):
with connection.cursor() as cur:
table = cls._meta.db_table
cur.execute(f'VACUUM ANALYZE {table}')
@classmethod
def delete_expired_logs(cls):
delays = collections.defaultdict(set)
all_apps = set()
for app in get_all_apps():
for connector in app.objects.all():
delay = connector.logging_parameters.log_retention_days or settings.LOG_RETENTION_DAYS
delays[delay].add((connector.get_connector_slug(), connector.slug))
all_apps.add((connector.get_connector_slug(), connector.slug))
if len(delays) == 0:
return
if len(delays) == 1:
# simple case
delay = list(delays)[0]
timestamp = timezone.now() - datetime.timedelta(days=delay)
cls.objects.filter(timestamp__lt=timestamp).delete()
return
for delay in sorted(delays, reverse=True):
timestamp = timezone.now() - datetime.timedelta(days=delay)
apps = delays[delay]
# use the simplest filter on app_name,slug
exclude = len(apps) >= len(all_apps) / 2
if exclude:
apps = all_apps - apps
app_filter = functools.reduce(
Q.__or__, (Q(appname=app_slug, slug=slug) for app_slug, slug in apps)
)
qs = cls.objects.filter(timestamp__lt=timestamp)
if exclude:
qs = qs.exclude(app_filter)
else:
qs = qs.filter(app_filter)
qs.delete()
def __str__(self):
return '%s %s %s %s' % (self.timestamp, self.levelno, self.appname, self.slug)

View File

@ -4,6 +4,9 @@ from unittest.mock import patch
import pytest
from django.contrib.contenttypes.models import ContentType
from django.core.management import call_command
from django.db import connection
from django.test.utils import CaptureQueriesContext
from django.urls import reverse
from django.utils import timezone
from requests.exceptions import ReadTimeout
@ -46,10 +49,10 @@ def test_log_cleaning(app, db, admin_user, settings):
assert ResourceLog.objects.all().count() == 3
settings.LOG_RETENTION_DAYS = 11
connector.daily()
call_command('cron', 'daily')
assert ResourceLog.objects.all().count() == 3
settings.LOG_RETENTION_DAYS = 10
connector.daily()
call_command('cron', 'daily')
assert ResourceLog.objects.all().count() == 1
ResourceLog.objects.all().delete()
@ -72,17 +75,49 @@ def test_log_cleaning(app, db, admin_user, settings):
assert not resp.html.find('input', {'name': 'log_retention_days'}).has_attr('value')
resp.form['log_retention_days'] = '11'
resp.form.submit()
connector.daily()
call_command('cron', 'daily')
assert ResourceLog.objects.all().count() == 3
resp = app.get(url)
assert int(resp.html.find('input', {'name': 'log_retention_days'})['value']) == 11
resp.form['log_retention_days'] = '10'
resp.form.submit()
connector.daily()
call_command('cron', 'daily')
assert ResourceLog.objects.all().count() == 1
def test_log_cleaning_multiple_delay(transactional_db, freezer):
connectors = [
OpenGIS.objects.create(slug=f'plop{i}', wms_service_url='http://www.example.net') for i in range(5)
]
for i in range(2):
logging_parameters = connectors[i].logging_parameters
logging_parameters.log_retention_days = 10
logging_parameters.save()
for i in range(5):
connectors[i].logger.error('hello1')
connectors[i].logger.error('hello2')
# check VACUUM ANALYZE is called only 1 time
with CaptureQueriesContext(connection) as context:
call_command('cron', 'daily')
assert str(list(context)).count('VACUUM ANALYZE base_resourcelog') == 1
assert ResourceLog.objects.all().count() == 10
# move 1 week into the future
freezer.tick(datetime.timedelta(days=7.1))
call_command('cron', 'daily')
# 6 log lines have been deleted (from plop2, 3 and 4)
assert ResourceLog.objects.all().count() == 4
# only logs of plop0 and plop1 have been kept
assert set(ResourceLog.objects.values_list('slug', flat=True)) == {'plop0', 'plop1'}
@pytest.fixture
def email_handler():
import logging

View File

@ -23,6 +23,7 @@ from unittest import mock
import pytest
import responses
from django.core.management import call_command
from django.db import transaction
from django.utils.dateparse import parse_date
from django.utils.timezone import now
@ -610,7 +611,7 @@ def test_clean_logs_daily(con, app, settings, freezer):
assert settings.LOG_RETENTION_DAYS == 7
freezer.move_to('2023-09-09 01:25:00')
con.daily()
call_command('cron', 'daily')
assert ResourceLog.objects.filter(timestamp__lte=datetime.datetime(2023, 9, 5, 1, 25)).count() == 0