authentic/src/authentic2/apps/journal/models.py

431 lines
14 KiB
Python

# authentic2 - versatile identity manager
# Copyright (C) 2010-2020 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from collections import defaultdict
from contextlib import contextmanager
from datetime import datetime, timedelta
import logging
import re
from django.conf import settings
from django.contrib.auth import get_user_model
from django.contrib.postgres.fields import ArrayField, JSONField
from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import ObjectDoesNotExist
from django.db import models
from django.db.models import QuerySet, Q, F, Value
from django.utils.translation import ugettext_lazy as _
from django.utils.timezone import utc, now
from authentic2.decorators import GlobalCache
from . import sql
logger = logging.getLogger(__name__)
User = get_user_model()
_registry = {}
@contextmanager
def clean_registry():
global _registry
old_registry = _registry
_registry = {}
yield
_registry = old_registry
class EventTypeDefinitionMeta(type):
def __new__(cls, name, bases, namespace, **kwargs):
global _registry
new_cls = type.__new__(cls, name, bases, namespace, **kwargs)
name = namespace.get('name')
if name:
assert (
new_cls.retention_days is None or new_cls.retention_days >= 0
), 'retention_days must be None or >= 0'
assert new_cls.name, 'name is missing'
assert re.match(r'^[a-z_]+(?:\.[a-z_]+)*$', new_cls.name), (
'%r is not proper event type name' % new_cls.name
)
assert new_cls.label, 'label is missing'
assert new_cls.name not in _registry, 'name %r is already registered' % new_cls.name
_registry[new_cls.name] = new_cls
return new_cls
class EventTypeDefinition(metaclass=EventTypeDefinitionMeta):
name = ''
label = None
# used to group type of events
# how long to keep this type of events
retention_days = None
@classmethod
def record(cls, user=None, session=None, references=None, data=None):
event_type = EventType.objects.get_for_name(cls.name)
Event.objects.create(
type=event_type,
user=user,
session_id=session and session.session_key,
references=references or None, # NULL values take less space
data=data or None, # NULL values take less space
)
@classmethod
def get_for_name(cls, name):
return _registry.get(name)
@classmethod
def search_by_name(self, name):
for evd_name, evd in _registry.items():
if evd_name == name or evd_name.rsplit('.', 1)[-1] == name:
yield evd
@classmethod
def get_message(self, event, context=None):
return self.label
def __repr__(self):
return '<EventTypeDefinition %r %s>' % (self.name, self.label)
@GlobalCache
def event_type_cache(name):
event_type, created = EventType.objects.get_or_create(name=name)
return event_type
class EventTypeManager(models.Manager):
def get_for_name(self, name):
return event_type_cache(name)
class EventType(models.Model):
name = models.SlugField(verbose_name=_('name'), max_length=256, unique=True)
@property
def definition(self):
return EventTypeDefinition.get_for_name(self.name)
def __str__(self):
definition = self.definition
if definition:
return str(definition.label)
else:
return '%s (definition not found)' % self.name
objects = EventTypeManager()
class Meta:
verbose_name = _('event type')
verbose_name_plural = _('event types')
ordering = ('name',)
class EventQuerySet(QuerySet):
@classmethod
def _which_references_query(cls, instance_or_model_class_or_queryset):
if isinstance(instance_or_model_class_or_queryset, type) and issubclass(
instance_or_model_class_or_queryset, models.Model
):
ct = ContentType.objects.get_for_model(instance_or_model_class_or_queryset)
q = Q(reference_ct_ids__contains=[ct.pk])
# users can also be references by the user_id column
if instance_or_model_class_or_queryset is User:
q |= Q(user__isnull=False)
return q
elif isinstance(instance_or_model_class_or_queryset, QuerySet):
qs = instance_or_model_class_or_queryset
model = qs.model
ct_model = ContentType.objects.get_for_model(model)
qs_array = qs.values_list(Value(ct_model.id << 32) + F('pk'), flat=True)
q = Q(reference_ids__overlap=sql.ArraySubquery(qs_array))
if issubclass(model, User):
q = q | Q(user__in=qs)
else:
instance = instance_or_model_class_or_queryset
q = Q(reference_ids__contains=[reference_integer(instance)])
if isinstance(instance, User):
q = q | Q(user=instance)
return q
def which_references(self, instance_or_queryset):
return self.filter(self._which_references_query(instance_or_queryset))
def from_cursor(self, cursor):
return self.filter(
Q(timestamp=cursor.timestamp, id__gte=cursor.event_id) | Q(timestamp__gt=cursor.timestamp)
)
def to_cursor(self, cursor):
return self.filter(
Q(timestamp=cursor.timestamp, id__lte=cursor.event_id) | Q(timestamp__lt=cursor.timestamp)
)
def __getitem__(self, i):
# slice by cursor:
# [cursor..20] or [-20..cursor]
# it simplifies pagination
if isinstance(i, slice) and i.step is None:
_slice = i
if isinstance(_slice.start, EventCursor) and isinstance(_slice.stop, int) and _slice.stop >= 0:
return self.from_cursor(_slice.start)[: _slice.stop]
if isinstance(_slice.start, int) and _slice.start <= 0 and isinstance(_slice.stop, EventCursor):
qs = self.order_by('-timestamp', '-id').to_cursor(_slice.stop)[: -_slice.start]
return list(reversed(qs))
return super().__getitem__(i)
def prefetch_references(self):
prefetch_events_references(self)
return self
class EventManager(models.Manager):
def get_queryset(self):
return super().get_queryset().select_related('type', 'user')
# contains/overlap operator on postresql ARRAY do not really support nested arrays,
# so we cannot use them to query an array generic foreign keys repsented as two
# elements integers arrays like '{{1,100},{2,300}}' as any integer will match.
# ex.:
# authentic_multitenant=# select '{{1,2}}'::int[] && '{{1,3}}'::int[];
# ?column?
# ----------
# t
# (1 line)
#
# To work around this limitation we map pair of integers (content_type.pk,
# instance.pk) to a corresponding unique 64-bit integer using the reversible
# mapping (content_type.pk << 32 + instance.pk).
def n_2_pairing(a, b):
return a * 2 ** 32 + b
def n_2_pairing_rev(n):
return (n >> 32, n & (2 ** 32 - 1))
def reference_integer(instance):
return n_2_pairing(ContentType.objects.get_for_model(instance).pk, instance.pk)
class Event(models.Model):
timestamp = models.DateTimeField(verbose_name=_('timestamp'), default=now, editable=False, blank=True)
user = models.ForeignKey(
verbose_name=_('user'),
to=User,
on_delete=models.DO_NOTHING,
db_constraint=False,
blank=True,
null=True,
)
session = models.ForeignKey(
verbose_name=_('session'),
to='sessions.Session',
on_delete=models.DO_NOTHING,
db_constraint=False,
blank=True,
null=True,
)
type = models.ForeignKey(verbose_name=_('type'), to=EventType, on_delete=models.PROTECT)
reference_ids = ArrayField(
verbose_name=_('reference ids'), base_field=models.BigIntegerField(), null=True,
)
reference_ct_ids = ArrayField(
verbose_name=_('reference ct ids'), base_field=models.IntegerField(), null=True,
)
data = JSONField(verbose_name=_('data'), null=True)
objects = EventManager.from_queryset(EventQuerySet)()
def __init__(self, *args, **kwargs):
references = kwargs.pop('references', ())
super().__init__(*args, **kwargs)
for reference in references or ():
self.add_reference_to_instance(reference)
def add_reference_to_instance(self, instance):
self.reference_ids = self.reference_ids or []
self.reference_ct_ids = self.reference_ct_ids or []
if instance is not None:
self.reference_ids.append(reference_integer(instance))
self.reference_ct_ids.append(ContentType.objects.get_for_model(instance).pk)
else:
self.reference_ids.append(0)
self.reference_ct_ids.append(0)
def get_reference_ids(self):
return map(n_2_pairing_rev, self.reference_ids or ())
@property
def references(self):
if not hasattr(self, '_references_cache'):
self._references_cache = []
for content_type_id, instance_pk in self.get_reference_ids():
if content_type_id != 0:
content_type = ContentType.objects.get_for_id(content_type_id)
try:
self._references_cache.append(content_type.get_object_for_this_type(pk=instance_pk))
continue
except ObjectDoesNotExist:
pass
self._references_cache.append(None)
return self._references_cache
@property
def cursor(self):
return EventCursor.from_event(self)
def __repr__(self):
return '<Event id:%s %s %s>' % (self.id, self.timestamp, self.type.name)
@classmethod
def cleanup(cls):
'''Expire old events by default retention days or customized at the
EventTypeDefinition level.'''
event_types_by_retention_days = defaultdict(set)
default_retention_days = getattr(settings, 'JOURNAL_DEFAULT_RETENTION_DAYS', 365 * 2)
for event_type in EventType.objects.all():
evd = event_type.definition
retention_days = evd.retention_days if evd else None
if retention_days == 0:
# do not expire
continue
if retention_days is None:
retention_days = default_retention_days
event_types_by_retention_days[retention_days].add(event_type)
for retention_days, event_types in event_types_by_retention_days.items():
threshold = now() - timedelta(days=retention_days)
Event.objects.filter(type__in=event_types).filter(timestamp__lt=threshold).delete()
@property
def session_id_shortened(self):
return (self.session_id and self.session_id[:6]) or '-'
@property
def message(self):
return self.message_in_context(None)
def message_in_context(self, context):
if self.type.definition:
try:
return self.type.definition.get_message(self, context)
except Exception:
logger.exception('could not render message of event type "%s"', self.type.name)
return self.type.name
def get_data(self, key, default=None):
return (self.data or {}).get(key, default)
def get_typed_references(self, *reference_types):
count = 0
for reference_type, reference in zip(reference_types, self.references):
if reference_type is None:
yield None
else:
if isinstance(reference, reference_type):
yield reference
else:
yield None
count += 1
for i in range(len(reference_types) - count):
yield None
class Meta:
verbose_name = _('event')
verbose_name_plural = _('events')
ordering = ('timestamp', 'id')
class EventCursor(str):
'''Represents a point in the journal'''
def __new__(cls, value):
self = super().__new__(cls, value)
try:
timestamp, event_id = value.split(' ', 1)
timestamp = float(timestamp)
event_id = int(event_id)
timestamp = datetime.fromtimestamp(timestamp, tz=utc)
except ValueError as e:
raise ValueError('invalid event cursor') from e
self.timestamp = timestamp
self.event_id = event_id
return self
@classmethod
def parse(cls, value):
try:
return cls(value)
except ValueError:
return None
@classmethod
def from_event(cls, event):
assert event.id is not None
assert event.timestamp is not None
cursor = super().__new__(cls, '%s %s' % (event.timestamp.timestamp(), event.id))
cursor.timestamp = event.timestamp
cursor.event_id = event.id
return cursor
def minus_one(self):
return EventCursor('%s %s' % (self.timestamp.timestamp(), self.event_id - 1))
def prefetch_events_references(events):
'''Prefetch references on an iterable of events, prevent N+1 queries problem.'''
grouped_references = defaultdict(set)
references = {}
# group reference ids
for event in events:
for content_type_id, instance_pk in event.get_reference_ids():
grouped_references[content_type_id].add(instance_pk)
# make batched queries for each CT
for content_type_id, instance_pks in grouped_references.items():
content_type = ContentType.objects.get_for_id(content_type_id)
for instance in content_type.get_all_objects_for_this_type(pk__in=instance_pks):
references[(content_type_id, instance.pk)] = instance
# assign references to events
for event in events:
event._references_cache = [
references.get((content_type_id, instance_pk))
for content_type_id, instance_pk in event.get_reference_ids()
]