WIP: improve statistics performance (#69606) #15

Draft
pducroquet wants to merge 4 commits from wip/69606-statistics-performance into main
3 changed files with 34 additions and 19 deletions

View File

@ -30,7 +30,7 @@ from django.contrib.postgres.fields import ArrayField
from django.contrib.postgres.fields.jsonb import KeyTextTransform
from django.core.exceptions import ObjectDoesNotExist
from django.db import models
from django.db.models import Count, F, Q, QuerySet, Value
from django.db.models import Count, F, Max, Min, Q, QuerySet, Value
from django.db.models.functions import Trunc
from django.utils.timezone import now, utc
from django.utils.translation import gettext_lazy as _
@ -143,6 +143,14 @@ class EventTypeDefinition(metaclass=EventTypeDefinitionMeta):
if end:
qs = qs.filter(timestamp__lte=end)
if which_references is not None:
qs = qs.which_references(which_references)
if users_ou:
qs = qs.filter(user__ou=users_ou)
x_interval_qs = qs
values = [group_by_time]
if group_by_time != 'timestamp':
qs = qs.annotate(
@ -154,15 +162,16 @@ class EventTypeDefinition(metaclass=EventTypeDefinitionMeta):
qs = qs.annotate(**{group_by_field: KeyTextTransform(group_by_field, 'data')})
values.append(group_by_field)
if which_references is not None:
qs = qs.which_references(which_references)
if users_ou:
qs = qs.filter(user__ou=users_ou)
qs = qs.values(*values)
qs = qs.annotate(count=Count('id'))
return qs.order_by(group_by_time)
x_interval = x_interval_qs.aggregate(
min=Trunc(Min("timestamp"), kind=group_by_time, output_field=models.DateField()),
max=Trunc(Max("timestamp"), kind=group_by_time, output_field=models.DateField()),
)
if x_interval["min"] is None or x_interval["max"] is None:
x_interval = None
return (qs.order_by(group_by_time), x_interval)
@classmethod
def get_global_statistics(cls, group_by_time, y_label, start=None, end=None):
@ -447,6 +456,9 @@ class Event(models.Model):
verbose_name = _('event')
verbose_name_plural = _('events')
ordering = ('timestamp', 'id')
index_together = [
("type", "timestamp"),
]
class EventCursor(str):

View File

@ -48,9 +48,9 @@ class Statistics:
}
default_y_label = _('None')
def __init__(self, qs, time_interval):
def __init__(self, qs, time_interval, x_interval=None):
self.time_interval = time_interval
self.x_labels = self.build_x_labels(qs)
self.x_labels = self.build_x_labels(qs, x_interval)
self._x_labels_indexes = {label: i for i, label in enumerate(self.x_labels)}
self.series = {}
self.y_labels = []
@ -58,15 +58,18 @@ class Statistics:
def set_y_labels(self, y_labels):
self.y_labels[:] = y_labels
def build_x_labels(self, qs):
def build_x_labels(self, qs, x_interval=None):
if self.time_interval == 'timestamp':
return list(qs.distinct().values_list(self.time_interval, flat=True))
aggregate = qs.aggregate(min=Min(self.time_interval), max=Max(self.time_interval))
if not aggregate['min']:
return []
if x_interval is not None:
aggregate = x_interval
else:
aggregate = qs.aggregate(min=Min(self.time_interval), max=Max(self.time_interval))
if not aggregate['min']:
return []
min_date, max_date = aggregate['min'].date(), aggregate['max'].date()
min_date, max_date = aggregate['min'], aggregate['max']
if self.time_interval == 'day':
return [min_date + timedelta(days=i) for i in range((max_date - min_date).days + 1)]
if self.time_interval == 'year':

View File

@ -68,7 +68,7 @@ class EventTypeWithHow(EventTypeWithService):
elif service:
which_references = service
qs = cls.get_statistics(
qs, x_interval = cls.get_statistics(
group_by_time=group_by_time,
group_by_field='how',
which_references=which_references,
@ -76,7 +76,7 @@ class EventTypeWithHow(EventTypeWithService):
start=start,
end=end,
)
stats = Statistics(qs, time_interval=group_by_time)
stats = Statistics(qs, time_interval=group_by_time, x_interval=x_interval)
for stat in qs:
stats.add(x_label=stat[group_by_time], y_label=stat['how'], value=stat['count'])
@ -84,8 +84,8 @@ class EventTypeWithHow(EventTypeWithService):
@classmethod
def _get_method_statistics_by_service_or_ou(cls, group_by_time, reference, **kwargs):
qs = cls.get_statistics(group_by_time, group_by_field='service_name', **kwargs)
stats = Statistics(qs, time_interval=group_by_time)
qs, x_interval = cls.get_statistics(group_by_time, group_by_field='service_name', **kwargs)
stats = Statistics(qs, x_interval=x_interval, time_interval=group_by_time)
if reference == 'service':
services = Service.objects.all()