journal: add event type statistics (#47467)

This commit is contained in:
Valentin Deniaud 2020-11-18 17:23:21 +01:00
parent 5371f986b6
commit c1345a3356
4 changed files with 335 additions and 20 deletions

View File

@ -23,10 +23,12 @@ import re
from django.conf import settings
from django.contrib.auth import get_user_model
from django.contrib.postgres.fields import ArrayField, JSONField
from django.contrib.postgres.fields.jsonb import KeyTextTransform
from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import ObjectDoesNotExist
from django.db import models
from django.db.models import QuerySet, Q, F, Value
from django.db.models import QuerySet, Q, F, Value, Count
from django.db.models.functions import Trunc
from django.utils.translation import ugettext_lazy as _
from django.utils.timezone import utc, now
@ -108,6 +110,48 @@ class EventTypeDefinition(metaclass=EventTypeDefinitionMeta):
def get_message(self, event, context=None):
return self.label
@classmethod
def get_statistics(
cls,
group_by_time,
group_by_field=None,
group_by_references=False,
which_references=None,
start=None,
end=None,
):
if group_by_time not in ('timestamp', 'day', 'month', 'year'):
raise ValueError('Usupported value for group_by_time: %s' % time_group_by)
event_type = EventType.objects.get_for_name(cls.name)
qs = Event.objects.filter(type=event_type)
if start:
qs = qs.filter(timestamp__gte=start)
if end:
qs = qs.filter(timestamp__lte=end)
values = [group_by_time]
if group_by_time != 'timestamp':
qs = qs.annotate(
**{group_by_time: Trunc('timestamp', kind=group_by_time, output_field=models.DateField())}
)
if group_by_field:
# get field from JSONField
qs = qs.annotate(**{group_by_field: KeyTextTransform(group_by_field, 'data')})
values.append(group_by_field)
if which_references:
qs = qs.which_references(which_references)
if group_by_references:
values.append('reference_ids')
qs = qs.values(*values)
qs = qs.annotate(count=Count('id'))
return qs.order_by(group_by_time)
def __repr__(self):
return '<EventTypeDefinition %r %s>' % (self.name, self.label)

View File

@ -14,6 +14,10 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from datetime import timedelta, date
from django.db.models import Min, Max, DateField
def _json_value(value):
if isinstance(value, (dict, list, str, int, bool)) or value is None:
@ -30,3 +34,62 @@ def form_to_old_new(form):
old[key] = _json_value(old_value)
new[key] = _json_value(form.cleaned_data.get(key))
return {'old': old, 'new': new}
class Statistics:
time_label_formats = {
'year': '%Y',
'month': '%Y-%m',
'day': '%Y-%m-%d',
}
def __init__(self, qs, time_interval):
self.time_interval = time_interval
self.x_labels = self.build_x_labels(qs)
self._x_labels_indexes = {label: i for i, label in enumerate(self.x_labels)}
self.series = {}
def build_x_labels(self, qs):
if self.time_interval == 'timestamp':
return list(qs.distinct().values_list(self.time_interval, flat=True))
aggregate = qs.aggregate(min=Min(self.time_interval), max=Max(self.time_interval))
if not aggregate['min']:
return []
min_date, max_date = aggregate['min'].date(), aggregate['max'].date()
if self.time_interval == 'day':
return [min_date + timedelta(days=i) for i in range((max_date - min_date).days + 1)]
if self.time_interval == 'year':
return [date(year=i, month=1, day=1) for i in range(min_date.year, max_date.year + 1)]
if self.time_interval == 'month':
x_labels = []
for year in range(min_date.year, max_date.year + 1):
start_month = 1 if year != min_date.year else min_date.month
end_month = 12 if year != max_date.year else max_date.month
for month in range(start_month, end_month + 1):
x_labels.append(date(year=year, month=month, day=1))
return x_labels
def add(self, x_label, y_label, value):
serie = self.get_serie(y_label)
index = self.x_index(x_label)
serie[index] = (serie[index] or 0) + value
def get_serie(self, label):
return self.series.setdefault(label, [None] * len(self.x_labels))
def x_index(self, x_label):
return self._x_labels_indexes[x_label]
def to_json(self, get_y_label=lambda x: x):
series = [{'label': get_y_label(label), 'data': data} for label, data in self.series.items()]
return {
'x_labels': [self.format_x_label(label) for label in self.x_labels],
'series': series,
}
def format_x_label(self, label):
if self.time_interval == 'timestamp':
return label.isoformat()
return label.strftime(self.time_label_formats[self.time_interval])

View File

@ -14,14 +14,15 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from django.contrib.contenttypes.models import ContentType
from django.utils.translation import ugettext_lazy as _
from authentic2.custom_user.models import get_attributes_map
from authentic2.apps.journal.models import EventTypeDefinition
from authentic2.apps.journal.utils import form_to_old_new
from authentic2.apps.journal.models import EventTypeDefinition, n_2_pairing_rev
from authentic2.apps.journal.utils import form_to_old_new, Statistics
from authentic2.custom_user.models import User
from . import models
from .models import Service
class EventTypeWithService(EventTypeDefinition):
@ -38,7 +39,7 @@ class EventTypeWithService(EventTypeDefinition):
@classmethod
def get_service_name(self, event):
(service,) = event.get_typed_references(models.Service)
(service,) = event.get_typed_references(Service)
if service is not None:
return str(service)
if 'service_name' in event.data:
@ -46,6 +47,60 @@ class EventTypeWithService(EventTypeDefinition):
return ''
class EventTypeWithHow(EventTypeWithService):
@classmethod
def record(cls, user, session, service, how):
super().record(user=user, session=session, service=service, data={'how': how})
@classmethod
def get_method_statistics(cls, group_by_time, service=None, ou=None, start=None, end=None):
if ou:
service = Service.objects.filter(ou=ou)
qs = cls.get_statistics(
group_by_time=group_by_time, group_by_field='how', which_references=service, start=start, end=end
)
stats = Statistics(qs, time_interval=group_by_time)
for stat in qs:
stats.add(x_label=stat[group_by_time], y_label=stat['how'], value=stat['count'])
return stats.to_json(get_y_label=lambda x: _(login_method_label(x or '')))
@classmethod
def _get_method_statistics_by_reference(cls, group_by_time, reference, **kwargs):
qs = cls.get_statistics(group_by_time, group_by_references=True, **kwargs)
stats = Statistics(qs, time_interval=group_by_time)
if reference == 'service':
reference_labels = {service.pk: str(service) for service in Service.objects.all()}
if reference == 'ou':
reference_labels = {
service.pk: str(service.ou) for service in Service.objects.all().select_related('ou')
}
service_ct_id = ContentType.objects.get_for_model(Service).pk
for stat in qs:
for reference_id in stat['reference_ids'] or []:
content_type_id, instance_pk = n_2_pairing_rev(reference_id)
if content_type_id == service_ct_id:
reference_label = reference_labels[instance_pk]
break
else:
reference_label = _('None')
stats.add(x_label=stat[group_by_time], y_label=reference_label, value=stat['count'])
return stats.to_json()
@classmethod
def get_service_statistics(cls, group_by_time, start=None, end=None):
return cls._get_method_statistics_by_reference(group_by_time, 'service', start=start, end=end)
@classmethod
def get_service_ou_statistics(cls, group_by_time, start=None, end=None):
return cls._get_method_statistics_by_reference(group_by_time, 'ou', start=start, end=end)
def login_method_label(how):
if how.startswith('password'):
return _('password')
@ -73,14 +128,10 @@ def get_attributes_label(attributes_new_values):
yield name
class UserLogin(EventTypeWithService):
class UserLogin(EventTypeWithHow):
name = 'user.login'
label = _('login')
@classmethod
def record(cls, user, session, service, how):
super().record(user=user, session=session, service=service, data={'how': how})
@classmethod
def get_message(cls, event, context):
how = event.get_data('how')
@ -115,14 +166,10 @@ class UserRegistrationRequest(EventTypeDefinition):
return _('registration request with email "%s"') % email
class UserRegistration(EventTypeWithService):
class UserRegistration(EventTypeWithHow):
name = 'user.registration'
label = _('registration')
@classmethod
def record(cls, user, session, service, how):
super().record(user=user, session=session, service=service, data={'how': how})
@classmethod
def get_message(cls, event, context):
how = event.get_data('how')
@ -219,14 +266,10 @@ class UserDeletion(EventTypeWithService):
super().record(user=user, session=session, service=service)
class UserServiceSSO(EventTypeWithService):
class UserServiceSSO(EventTypeWithHow):
name = 'user.service.sso'
label = _('service single sign on')
@classmethod
def record(cls, user, session, service, how):
super().record(user=user, session=session, service=service, data={'how': how})
@classmethod
def get_message(cls, event, context):
service_name = cls.get_service_name(event)

View File

@ -19,11 +19,14 @@ import random
import mock
import pytest
import pytz
from django.contrib.auth import get_user_model
from django.core.management import call_command
from django.utils.timezone import make_aware, make_naive
from authentic2.a2_rbac.models import OrganizationalUnit as OU
from authentic2.a2_rbac.utils import get_default_ou
from authentic2.apps.journal.forms import JournalForm
from authentic2.apps.journal.journal import Journal
from authentic2.apps.journal.models import EventTypeDefinition, EventType, Event, clean_registry
@ -443,3 +446,165 @@ def test_message_in_context_exception_handling(db, some_event_types, caplog):
assert len(caplog.records) == 1
assert caplog.records[0].levelname == 'ERROR'
assert caplog.records[0].message == 'could not render message of event type "user.login"'
@pytest.mark.parametrize('event_type_name', ['user.login', 'user.registration'])
def test_statistics(db, event_type_name, freezer):
user = User.objects.create(username='john.doe', email='john.doe@example.com')
user2 = User.objects.create(username='jane.doe', email='jane.doe@example.com')
ou = OU.objects.create(name='Second OU')
portal = Service.objects.create(name='portal', slug='portal', ou=ou)
agendas = Service.objects.create(name='agendas', slug='agendas', ou=get_default_ou())
forms = Service.objects.create(name='forms', slug='forms', ou=get_default_ou())
method = {'how': 'password-on-https'}
method2 = {'how': 'fc'}
event_type = EventType.objects.get_for_name(event_type_name)
event_type_definition = event_type.definition
stats = event_type_definition.get_method_statistics('timestamp')
assert stats == {'series': [], 'x_labels': []}
stats = event_type_definition.get_method_statistics('month')
assert stats == {'series': [], 'x_labels': []}
freezer.move_to('2020-02-03 12:00')
event = Event.objects.create(type=event_type, references=[user, portal], user=user, data=method)
event = Event.objects.create(type=event_type, references=[user2, portal], user=user2, data=method)
freezer.move_to('2020-02-03 13:00')
event = Event.objects.create(type=event_type, references=[user, portal], user=user, data=method2)
event = Event.objects.create(type=event_type, references=[user2, portal], user=user2, data=method2)
freezer.move_to('2020-03-03 12:00')
event = Event.objects.create(type=event_type, references=[user, portal], user=user, data=method)
event = Event.objects.create(type=event_type, references=[user, agendas], user=user, data=method)
event = Event.objects.create(type=event_type, references=[user, forms], user=user, data=method)
event = Event.objects.create(type=event_type, user=user)
stats = event_type_definition.get_method_statistics('timestamp')
stats['series'].sort(key=lambda x: x['label'])
assert stats == {
'x_labels': ['2020-02-03T12:00:00+00:00', '2020-02-03T13:00:00+00:00', '2020-03-03T12:00:00+00:00'],
'series': [
{'label': 'FranceConnect', 'data': [None, 2, None]},
{'label': 'none', 'data': [None, None, 1]},
{'label': 'password', 'data': [2, None, 3]},
],
}
start = datetime(year=2020, month=2, day=3, hour=12, minute=30, tzinfo=pytz.UTC)
end = datetime(year=2020, month=2, day=3, hour=13, minute=30, tzinfo=pytz.UTC)
stats = event_type_definition.get_method_statistics('timestamp', start=start, end=end)
assert stats == {
'x_labels': ['2020-02-03T13:00:00+00:00'],
'series': [{'label': 'FranceConnect', 'data': [2]},],
}
stats = event_type_definition.get_method_statistics('month')
stats['series'].sort(key=lambda x: x['label'])
assert stats == {
'x_labels': ['2020-02', '2020-03'],
'series': [
{'label': 'FranceConnect', 'data': [2, None]},
{'label': 'none', 'data': [None, 1]},
{'label': 'password', 'data': [2, 3]},
],
}
stats = event_type_definition.get_method_statistics('month', ou=get_default_ou())
assert stats == {
'x_labels': ['2020-03'],
'series': [{'label': 'password', 'data': [2]},],
}
stats = event_type_definition.get_method_statistics('month', ou=ou)
stats['series'].sort(key=lambda x: x['label'])
assert stats == {
'x_labels': ['2020-02', '2020-03'],
'series': [{'label': 'FranceConnect', 'data': [2, None]}, {'label': 'password', 'data': [2, 1]}],
}
stats = event_type_definition.get_method_statistics('month', service=portal)
stats['series'].sort(key=lambda x: x['label'])
assert stats == {
'x_labels': ['2020-02', '2020-03'],
'series': [{'label': 'FranceConnect', 'data': [2, None]}, {'label': 'password', 'data': [2, 1]}],
}
stats = event_type_definition.get_method_statistics('year')
stats['series'].sort(key=lambda x: x['label'])
assert stats == {
'x_labels': ['2020'],
'series': [
{'label': 'FranceConnect', 'data': [2]},
{'label': 'none', 'data': [1]},
{'label': 'password', 'data': [5]},
],
}
stats = event_type_definition.get_service_statistics('month')
stats['series'].sort(key=lambda x: x['label'])
assert stats == {
'x_labels': ['2020-02', '2020-03'],
'series': [
{'label': 'None', 'data': [None, 1]},
{'label': 'agendas', 'data': [None, 1]},
{'label': 'forms', 'data': [None, 1]},
{'label': 'portal', 'data': [4, 1]},
],
}
stats = event_type_definition.get_service_ou_statistics('month')
stats['series'].sort(key=lambda x: x['label'])
assert stats == {
'x_labels': ['2020-02', '2020-03'],
'series': [
{'label': 'Default organizational unit', 'data': [None, 2]},
{'label': 'None', 'data': [None, 1]},
{'label': 'Second OU', 'data': [4, 1]},
],
}
def test_statistics_fill_date_gaps(db, freezer):
user = User.objects.create(username='john.doe', email='john.doe@example.com')
method = {'how': 'password-on-https'}
event_type = EventType.objects.get_for_name('user.login')
freezer.move_to('2020-12-29 12:00')
event = Event.objects.create(type=event_type, data=method)
freezer.move_to('2021-01-02 13:00')
event = Event.objects.create(type=event_type, data=method)
event_type_definition = event_type.definition
stats = event_type_definition.get_method_statistics('day')
assert stats == {
'x_labels': ['2020-12-29', '2020-12-30', '2020-12-31', '2021-01-01', '2021-01-02'],
'series': [{'label': 'password', 'data': [1, None, None, None, 1]}],
}
Event.objects.all().delete()
freezer.move_to('2020-11-29 12:00')
event = Event.objects.create(type=event_type, data=method)
freezer.move_to('2022-02-02 13:00')
event = Event.objects.create(type=event_type, data=method)
stats = event_type_definition.get_method_statistics('month')
assert stats == {
'x_labels': ['2020-11', '2020-12'] + ['2021-%02d' % i for i in range(1, 13)] + ['2022-01', '2022-02'],
'series': [{'label': 'password', 'data': [1] + [None] * 14 + [1]}],
}
Event.objects.all().delete()
freezer.move_to('2020-11-29 12:00')
event = Event.objects.create(type=event_type, data=method)
freezer.move_to('2025-02-02 13:00')
event = Event.objects.create(type=event_type, data=method)
stats = event_type_definition.get_method_statistics('year')
assert stats == {
'x_labels': ['2020', '2021', '2022', '2023', '2024', '2025'],
'series': [{'label': 'password', 'data': [1, None, None, None, None, 1]}],
}