combo/combo/apps/dataviz/models.py

772 lines
28 KiB
Python

# combo - content management system
# Copyright (C) 2014-2015 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import copy
import os
import re
from collections import OrderedDict
from datetime import date, datetime, timedelta
import pygal
import pygal.util
from dateutil.relativedelta import MO, relativedelta
from django.conf import settings
from django.contrib.postgres.fields import JSONField
from django.db import models, transaction
from django.template import RequestContext, Template, TemplateSyntaxError, VariableDoesNotExist
from django.template.defaultfilters import date as format_date
from django.urls import reverse
from django.utils import timezone
from django.utils.dates import WEEKDAYS
from django.utils.encoding import force_text
from django.utils.functional import cached_property
from django.utils.translation import gettext
from django.utils.translation import ugettext_lazy as _
from django.utils.translation import ungettext
from requests.exceptions import RequestException
from combo.data.library import register_cell_class
from combo.data.models import CellBase, django_template_validator
from combo.middleware import get_request
from combo.utils import get_templated_url, requests, spooler
class UnsupportedDataSet(Exception):
pass
class MissingRequest(Exception):
pass
class MissingVariable(Exception):
pass
@register_cell_class
class Gauge(CellBase):
title = models.CharField(_('Title'), max_length=150, blank=True, null=True)
url = models.CharField(_('URL'), max_length=150, blank=True, null=True)
data_source = models.CharField(_('Data Source'), max_length=150, blank=True, null=True)
jsonp_data_source = models.BooleanField(_('Use JSONP to get data'), default=True)
max_value = models.PositiveIntegerField(_('Max Value'), blank=True, null=True)
default_template_name = 'combo/gauge-cell.html'
class Media:
js = ('js/gauge.min.js', 'js/combo.gauge.js')
class Meta:
verbose_name = _('Gauge')
def get_additional_label(self):
return self.title
def is_relevant(self, context):
return bool(self.data_source)
def get_cell_extra_context(self, context):
if self.jsonp_data_source:
data_source_url = get_templated_url(self.data_source)
else:
data_source_url = reverse('combo-ajax-gauge-count', kwargs={'cell': self.id})
return {
'cell': self,
'title': self.title,
'url': get_templated_url(self.url) if self.url else None,
'max_value': self.max_value,
'data_source_url': data_source_url,
'jsonp': self.jsonp_data_source,
}
@register_cell_class
class ChartCell(CellBase):
default_template_name = 'combo/dataviz-chart.html'
title = models.CharField(_('Title'), max_length=150, blank=True, null=True)
url = models.URLField(_('URL'), max_length=250, blank=True, null=True)
class Meta:
verbose_name = _('Chart (legacy)')
@classmethod
def is_enabled(cls):
return (
settings.LEGACY_CHART_CELL_ENABLED
and hasattr(settings, 'KNOWN_SERVICES')
and settings.KNOWN_SERVICES.get('bijoe')
)
def get_default_form_class(self):
from .forms import ChartForm
return ChartForm
def get_additional_label(self):
if self.title:
return self.title
return ''
def get_cell_extra_context(self, context):
context = super().get_cell_extra_context(context)
context['title'] = self.title
context['url'] = self.url
return context
class StatisticManager(models.Manager):
def get_by_natural_key(self, slug, site_slug, service_slug):
return self.get_or_create(slug=slug, site_slug=site_slug, service_slug=service_slug)[0]
class Statistic(models.Model):
slug = models.SlugField(_('Slug'), max_length=256)
label = models.CharField(_('Label'), max_length=256)
site_slug = models.SlugField(_('Site slug'), max_length=256)
service_slug = models.SlugField(_('Service slug'), max_length=256)
site_title = models.CharField(_('Site title'), max_length=256)
url = models.URLField(_('Data URL'))
filters = JSONField(default=list)
has_future_data = models.BooleanField(default=False)
available = models.BooleanField(_('Available data'), default=True)
last_update = models.DateTimeField(_('Last update'), null=True, auto_now=True)
objects = StatisticManager()
class Meta:
ordering = ['-available', 'site_title', 'label']
unique_together = ['slug', 'site_slug', 'service_slug']
def __str__(self):
name = _('%(title)s: %(label)s') % {'title': self.site_title or self.site_slug, 'label': self.label}
if not self.available:
name = _('%s (unavailable)') % name
return name
def natural_key(self):
return (self.slug, self.site_slug, self.service_slug)
def has_native_support_for_interval(self, time_interval):
# pylint: disable=not-an-iterable
return any(
time_interval == x['id']
for filter_ in self.filters
for x in filter_['options']
if filter_['id'] == 'time_interval'
)
TIME_FILTERS = [
('previous-year', _('Previous year')),
('current-year', _('Current year')),
('next-year', _('Next year')),
('previous-month', _('Previous month')),
('current-month', _('Current month')),
('next-month', _('Next month')),
('previous-week', _('Previous week')),
('current-week', _('Current week')),
('next-week', _('Next week')),
('range', _('Free range (date)')),
('range-template', _('Free range (template)')),
]
@register_cell_class
class ChartNgCell(CellBase):
statistic = models.ForeignKey(
verbose_name=_('Data'),
to=Statistic,
blank=False,
null=True,
on_delete=models.SET_NULL,
related_name='cells',
help_text=_(
'This list may take a few seconds to be updated, please refresh the page if an item is missing.'
),
)
subfilters = JSONField(default=list)
filter_params = JSONField(default=dict)
title = models.CharField(_('Title'), max_length=150, blank=True)
time_range = models.CharField(
_('Shown period'),
max_length=20,
blank=True,
choices=TIME_FILTERS,
)
time_range_start = models.DateField(_('From'), null=True, blank=True)
time_range_end = models.DateField(_('To'), null=True, blank=True)
time_range_start_template = models.CharField(
_('From'),
max_length=200,
blank=True,
validators=[django_template_validator],
help_text=_(
'Template code returning a date. For example, Monday in two weeks would be '
'today|add_days:"14"|adjust_to_week_monday. Page variables are also accessible.'
),
)
time_range_end_template = models.CharField(
_('To'),
max_length=200,
blank=True,
validators=[django_template_validator],
)
chart_type = models.CharField(
_('Chart Type'),
max_length=20,
default='bar',
choices=(
('bar', _('Bar')),
('horizontal-bar', _('Horizontal Bar')),
('stacked-bar', _('Stacked Bar')),
('stacked-bar-percent', _('Stacked Bar (%)')),
('line', _('Line')),
('pie', _('Pie')),
('dot', _('Dot')),
('table', _('Table')),
),
)
height = models.CharField(
_('Height'),
max_length=20,
default='250',
choices=(
('150', _('Short (150px)')),
('250', _('Average (250px)')),
('350', _('Tall (350px)')),
),
)
sort_order = models.CharField(
_('Sort data'),
max_length=5,
default='none',
help_text=_('This setting only applies for one-dimensional charts.'),
choices=(
('none', _('None')),
('alpha', _('Alphabetically')),
('asc', _('Increasing values')),
('desc', _('Decreasing values')),
),
)
hide_null_values = models.BooleanField(
default=False,
verbose_name=_('Hide null values'),
help_text=_('This setting only applies for one-dimensional charts.'),
)
manager_form_template = 'combo/chartngcell_form.html'
class Meta:
verbose_name = _('Chart')
class Media:
js = ('js/chartngcell.js',)
@classmethod
def is_enabled(cls):
return settings.KNOWN_SERVICES.get('bijoe') or settings.STATISTICS_PROVIDERS
def get_default_form_class(self):
from .forms import ChartNgForm
return ChartNgForm
def get_additional_label(self):
return self.title
def is_relevant(self, context):
return bool(self.statistic)
def check_validity(self):
if not self.statistic:
return
if not self.statistic.url:
self.mark_as_invalid('missing_statistic_url')
return
resp = None
try:
resp = self.get_statistic_data()
except (RequestException, MissingRequest, MissingVariable):
pass
self.set_validity_from_url(
resp, not_found_code='statistic_data_not_found', invalid_code='statistic_url_invalid'
)
def get_statistic_data(self, raise_if_not_cached=False, invalidate_cache=False):
return requests.get(
self.statistic.url,
params=self.get_filter_params(),
cache_duration=300,
remote_service='auto',
without_user=True,
raise_if_not_cached=raise_if_not_cached,
log_errors=False,
invalidate_cache=invalidate_cache,
)
def get_chart(self, width=None, height=None, raise_if_not_cached=False):
transaction.on_commit(lambda: spooler.refresh_statistics_data(cell_pk=self.pk))
response = self.get_statistic_data(raise_if_not_cached)
response.raise_for_status()
response = response.json()
style = pygal.style.DefaultStyle(font_family='"Open Sans", sans-serif', background='transparent')
chart = {
'bar': pygal.Bar,
'horizontal-bar': pygal.HorizontalBar,
'stacked-bar': pygal.StackedBar,
'stacked-bar-percent': pygal.StackedBar,
'line': pygal.Line,
'pie': pygal.Pie,
'dot': pygal.Dot,
'table': pygal.Bar,
}[self.chart_type](config=pygal.Config(style=copy.copy(style), order_min=0.1, max_scale=5))
if self.statistic.service_slug == 'bijoe':
x_labels, y_labels, data = self.parse_response(response, chart)
chart.x_labels = x_labels
self.prepare_chart(chart, width, height)
if chart.axis_count == 1:
data = self.process_one_dimensional_data(chart, data)
self.add_data_to_chart(chart, data, y_labels)
else:
data = response['data']
interval = self.filter_params.get('time_interval', '')
if data['x_labels'] and interval:
if interval == 'day' or not self.statistic.has_native_support_for_interval(interval):
self.aggregate_data(data, interval)
elif interval == 'month':
data['x_labels'] = [
format_date(datetime.strptime(x, '%Y-%m'), 'M Y') for x in data['x_labels']
]
chart.x_labels = data['x_labels']
chart.axis_count = min(len(data['series']), 2)
self.prepare_chart(chart, width, height)
if chart.axis_count == 1:
data['series'][0]['data'] = self.process_one_dimensional_data(
chart, data['series'][0]['data']
)
if self.chart_type == 'pie':
data["series"] = [
{"label": label, "data": [data]}
for label, data in zip(chart.x_labels, data["series"][0]["data"])
if data
]
if self.chart_type == 'stacked-bar-percent':
self.make_percent([serie['data'] for serie in data['series']])
for serie in data['series']:
chart.add(serie['label'], serie['data'])
return chart
def get_filter_params(self):
params = {k: self.evaluate_filter_value(v) for k, v in self.filter_params.items() if v}
now = timezone.now().date()
if self.time_range == 'current-year':
params['start'] = date(year=now.year, month=1, day=1)
params['end'] = date(year=now.year + 1, month=1, day=1)
elif self.time_range == 'previous-year':
params['start'] = date(year=now.year - 1, month=1, day=1)
params['end'] = date(year=now.year, month=1, day=1)
elif self.time_range == 'next-year':
params['start'] = date(year=now.year + 1, month=1, day=1)
params['end'] = date(year=now.year + 2, month=1, day=1)
elif self.time_range == 'current-month':
params['start'] = now.replace(day=1)
params['end'] = now + relativedelta(day=1, months=1)
elif self.time_range == 'previous-month':
params['start'] = now + relativedelta(day=1, months=-1)
params['end'] = now.replace(day=1)
elif self.time_range == 'next-month':
params['start'] = now + relativedelta(day=1, months=1)
params['end'] = now + relativedelta(day=1, months=2)
elif self.time_range == 'current-week':
params['start'] = now + relativedelta(weekday=MO(-1))
params['end'] = now + relativedelta(weekday=MO(+1), days=+1)
elif self.time_range == 'previous-week':
params['start'] = now + relativedelta(weekday=MO(-2))
params['end'] = now + relativedelta(weekday=MO(-1))
elif self.time_range == 'next-week':
params['start'] = now + relativedelta(weekday=MO(+1), days=+1)
params['end'] = now + relativedelta(weekday=MO(+2), days=+1)
elif self.time_range == 'range':
if self.time_range_start:
params['start'] = self.time_range_start
if self.time_range_end:
params['end'] = self.time_range_end
elif self.time_range == 'range-template':
if self.time_range_start_template:
start = self.evaluate_range_template(self.time_range_start_template)
if start:
params['start'] = start
if self.time_range_end_template:
end = self.evaluate_range_template(self.time_range_end_template)
if end:
params['end'] = end
if 'time_interval' in params and not self.statistic.has_native_support_for_interval(
params['time_interval']
):
params['time_interval'] = 'day'
return params
def evaluate_range_template(self, value):
if value in self.page.extra_variables:
value = self.page.extra_variables[value].strip('{ }')
context = self.request_context
context.update({'now': datetime.now, 'today': datetime.now})
try:
return Template('{{ %s|date:"Y-m-d" }}' % value).render(context)
except (VariableDoesNotExist, TemplateSyntaxError):
return None
def evaluate_filter_value(self, value):
if isinstance(value, list) or not value.startswith('variable:'):
return value
try:
variable = self.page.extra_variables[value.replace('variable:', '')]
except KeyError:
raise MissingVariable
return Template(variable).render(self.request_context)
@cached_property
def request_context(self):
if not getattr(self, '_request', None):
raise MissingRequest
ctx = RequestContext(self._request, getattr(self._request, 'extra_context', {}))
ctx['request'] = self._request
return ctx
def parse_response(self, response, chart):
# normalize axis to have a fake axis when there are no dimensions and
# always a x axis when there is a single dimension.
data = response['data']
loop_labels = response['axis'].get('loop') or []
x_labels = response['axis'].get('x_labels') or []
y_labels = response['axis'].get('y_labels') or []
if loop_labels:
if 'x_labels' in response['axis'] and 'y_labels' in response['axis']:
# no support for three dimensions
raise UnsupportedDataSet()
if not y_labels:
y_labels = loop_labels
else:
x_labels, y_labels = y_labels, loop_labels
if (
x_labels
and y_labels
and (len(y_labels) != len(data) or not all([len(x) == len(x_labels) for x in data]))
):
# varying dimensions
raise UnsupportedDataSet()
if not x_labels and not y_labels: # unidata
x_labels = ['']
y_labels = ['']
data = [data]
chart.axis_count = 0
elif not x_labels:
x_labels = y_labels
y_labels = ['']
chart.axis_count = 1
elif not y_labels:
y_labels = ['']
chart.axis_count = 1
else:
chart.axis_count = 2
chart.compute_sum = bool(response.get('measure') == 'integer' and chart.axis_count > 0)
formatter = self.get_value_formatter(response.get('unit'), response.get('measure'))
if formatter is not None:
chart.config.value_formatter = formatter
return x_labels, y_labels, data
def prepare_chart(self, chart, width, height):
chart.config.margin = 0
if width:
chart.config.width = width
if height:
chart.config.height = height
if width or height:
chart.config.explicit_size = True
chart.config.js = [os.path.join(settings.STATIC_URL, 'js/pygal-tooltips.js')]
chart.show_legend = bool(chart.axis_count > 1)
chart.truncate_legend = 30
# matplotlib tab10 palette
chart.config.style.colors = (
'#1f77b4',
'#ff7f0e',
'#2ca02c',
'#d62728',
'#9467bd',
'#8c564b',
'#e377c2',
'#7f7f7f',
'#bcbd22',
'#17becf',
)
if self.chart_type == 'dot':
chart.show_legend = False
# use a single colour for dots
chart.config.style.colors = ('#1f77b4',) * len(chart.x_labels)
if self.chart_type != 'pie':
if width and width < 500:
chart.legend_at_bottom = True
if self.chart_type == 'horizontal-bar':
# truncate labels
chart.x_labels = [pygal.util.truncate(x, 15) for x in chart.x_labels]
else:
chart.show_legend = True
if width and width < 500:
chart.truncate_legend = 15
def process_one_dimensional_data(self, chart, data):
if self.hide_null_values:
data = self.hide_values(chart, data)
if data and self.sort_order != 'none':
data = self.sort_values(chart, data)
if getattr(chart, 'compute_sum', True) and self.chart_type == 'table':
data = self.add_total_to_line_table(chart, data)
return data
@staticmethod
def hide_values(chart, data):
x_labels, new_data = [], []
for label, value in zip(chart.x_labels, data):
if value:
x_labels.append(label)
new_data.append(value)
chart.x_labels = x_labels
return new_data
def sort_values(self, chart, data):
if self.sort_order == 'alpha':
digit_re = re.compile('([0-9]+)')
def natural_sort_key(item):
return [int(text) if text.isdigit() else text.lower() for text in digit_re.split(item[0])]
tmp_items = sorted(zip(chart.x_labels, data), key=natural_sort_key)
elif self.sort_order == 'asc':
tmp_items = sorted(zip(chart.x_labels, data), key=lambda x: (x[1] or 0))
elif self.sort_order == 'desc':
tmp_items = sorted(zip(chart.x_labels, data), key=lambda x: (x[1] or 0), reverse=True)
x_labels, sorted_data = zip(*((label, value) for label, value in tmp_items))
chart.x_labels = list(x_labels)
return list(sorted_data)
@staticmethod
def add_total_to_line_table(chart, data):
# workaround pygal
chart.compute_sum = False
data.append(sum(x for x in data if x is not None))
chart.x_labels.append(gettext('Total'))
return data
def add_data_to_chart(self, chart, data, y_labels):
if self.chart_type != 'pie':
series_data = []
for i, serie_label in enumerate(y_labels):
if chart.axis_count < 2:
values = data
else:
values = [data[i][j] for j in range(len(chart.x_labels))]
series_data.append(values)
chart.add(serie_label, values)
if self.chart_type == 'stacked-bar-percent':
self.make_percent(series_data)
else:
# pie, create a serie by data, to get different colours
values = data
for label, value in zip(chart.x_labels, values):
if not value:
continue
chart.add(label, value)
@staticmethod
def get_value_formatter(unit, measure):
if unit == 'seconds' or measure == 'duration':
def format_duration(value):
if value is None:
return '-'
days = value // 86400
hours = (value % 86400) // 3600
if days:
days_string = ungettext('%d day', '%d days', days) % days
if hours:
hours_string = ungettext('%d hour', '%d hours', hours) % hours
if days and hours:
value = _('%(days_string)s and %(hours_string)s') % {
'days_string': days_string,
'hours_string': hours_string,
}
elif days:
value = days_string
elif hours:
value = hours_string
else:
value = _('Less than an hour')
return force_text(value)
return format_duration
elif measure == 'percent':
percent_formatter = lambda x: f'{x:.1f}%'
return percent_formatter
def make_percent(self, series_data):
for i, values in enumerate(zip(*series_data)):
sum_values = sum(v for v in values if v is not None)
if sum_values == 0:
continue
factor = 100 / sum_values
for values in series_data:
if values[i] is not None:
values[i] = round(values[i] * factor, 1)
@staticmethod
def aggregate_data(data, interval):
series_data = [serie['data'] for serie in data['series']]
dates = [datetime.strptime(label, '%Y-%m-%d') for label in data['x_labels']]
min_date, max_date = min(dates), max(dates)
date_formats = {
'day': 'd M Y',
# Translators: This indicates week number followed by year, for example it can yield W2-2021.
# First "W" is the first letter of the word "week" and should be translated accordingly, second
# "W" and "o" are interpreted by Django's date filter and should be left as is. First W is
# backslash escaped to prevent it from being interpreted, translators should refer to Django's
# documentation in order to know if the new letter resulting of translation should be escaped or not.
'week': gettext(r'\WW-o'),
'month': 'M Y',
'year': 'Y',
'weekday': 'l',
}
if interval == 'day':
x_labels = [
format_date(min_date + timedelta(days=i), date_formats['day'])
for i in range((max_date - min_date).days + 1)
]
elif interval == 'month':
month_difference = max_date.month - min_date.month + (max_date.year - min_date.year) * 12
x_labels = [
format_date(min_date + relativedelta(months=i), date_formats['month'])
for i in range(month_difference + 1)
]
elif interval == 'year':
x_labels = [str(year) for year in range(min_date.year, max_date.year + 1)]
elif interval == 'weekday':
x_labels = [str(label) for label in WEEKDAYS.values()]
elif interval == 'week':
x_labels = []
date, last_date = min_date, max_date
if min_date.weekday() > max_date.weekday():
last_date += relativedelta(weeks=1)
while date <= last_date:
x_labels.append(format_date(date, date_formats['week']))
date += relativedelta(weeks=1)
aggregates = OrderedDict((label, [0] * len(series_data)) for label in x_labels)
for i, date in enumerate(dates):
key = format_date(date, date_formats[interval])
for j, dummy in enumerate(series_data):
aggregates[key][j] += series_data[j][i] or 0
data['x_labels'] = x_labels
for i, serie in enumerate(data['series']):
serie['data'] = [values[i] for values in aggregates.values()]
@property
def available_filters(self):
return self.statistic.filters + self.subfilters
def update_subfilters(self):
self._request = get_request()
try:
response = self.get_statistic_data()
except (TemplateSyntaxError, VariableDoesNotExist):
return
try:
response.raise_for_status()
data = response.json()['data']
except Exception:
return
new_subfilters = data.get('subfilters', [])
if self.subfilters != new_subfilters:
self.subfilters = new_subfilters
subfilter_ids = {filter_['id'] for filter_ in self.available_filters}
self.filter_params = {k: v for k, v in self.filter_params.items() if k in subfilter_ids}
self.save()
def get_cache_key(self, filters_cell_id):
return 'dataviz:%s:%s' % (filters_cell_id, self.pk)
@register_cell_class
class ChartFiltersCell(CellBase):
title = _('Filters')
default_template_name = 'combo/chart-filters.html'
max_one_by_page = True
class Meta:
verbose_name = _('Filters')
@classmethod
def is_enabled(cls):
return settings.CHART_FILTERS_CELL_ENABLED and settings.STATISTICS_PROVIDERS
def get_cell_extra_context(self, context):
from .forms import ChartFiltersForm
ctx = super().get_cell_extra_context(context)
if 'filters_cell_id' in context['request'].GET: # detect refresh on submit
ctx['form'] = ChartFiltersForm(
data=context['request'].GET,
page=self.page,
filters_cell_id=context['request'].GET['filters_cell_id'],
)
else:
ctx['form'] = ChartFiltersForm(page=self.page)
return ctx