combo/combo/apps/dataviz/models.py

805 lines
29 KiB
Python

# combo - content management system
# Copyright (C) 2014-2015 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import copy
import os
import re
from collections import OrderedDict
from datetime import date, datetime, timedelta
import pygal
import pygal.util
from dateutil.relativedelta import MO, relativedelta
from django.conf import settings
from django.db import models, transaction
from django.db.models import JSONField
from django.template import RequestContext, Template, TemplateSyntaxError, VariableDoesNotExist
from django.template.defaultfilters import date as format_date
from django.urls import reverse
from django.utils import timezone
from django.utils.dates import WEEKDAYS
from django.utils.functional import cached_property
from django.utils.timesince import timesince
from django.utils.translation import gettext
from django.utils.translation import gettext_lazy as _
from requests.exceptions import RequestException
from combo.data.library import register_cell_class
from combo.data.models import CellBase, django_template_validator
from combo.middleware import get_request
from combo.utils import get_templated_url, requests, spooler
class UnsupportedDataSet(Exception):
pass
class MissingRequest(Exception):
pass
class MissingVariable(Exception):
pass
@register_cell_class
class Gauge(CellBase):
title = models.CharField(_('Title'), max_length=150, blank=True, null=True)
url = models.CharField(_('URL'), max_length=150, blank=True, null=True)
data_source = models.CharField(_('Data Source'), max_length=150, blank=True, null=True)
jsonp_data_source = models.BooleanField(_('Use JSONP to get data'), default=True)
max_value = models.PositiveIntegerField(_('Max Value'), blank=True, null=True)
default_template_name = 'combo/gauge-cell.html'
class Media:
js = ('js/gauge.min.js', 'js/combo.gauge.js')
class Meta:
verbose_name = _('Gauge')
def get_additional_label(self):
return self.title
def is_relevant(self, context):
return bool(self.data_source)
def get_cell_extra_context(self, context):
if self.jsonp_data_source:
data_source_url = get_templated_url(self.data_source)
else:
data_source_url = reverse('combo-ajax-gauge-count', kwargs={'cell': self.id})
return {
'cell': self,
'title': self.title,
'url': get_templated_url(self.url) if self.url else None,
'max_value': self.max_value,
'data_source_url': data_source_url,
'jsonp': self.jsonp_data_source,
}
@register_cell_class
class ChartCell(CellBase):
default_template_name = 'combo/dataviz-chart.html'
title = models.CharField(_('Title'), max_length=150, blank=True, null=True)
url = models.URLField(_('URL'), max_length=250, blank=True, null=True)
class Meta:
verbose_name = _('Chart (legacy)')
@classmethod
def is_enabled(cls):
return (
settings.LEGACY_CHART_CELL_ENABLED
and hasattr(settings, 'KNOWN_SERVICES')
and settings.KNOWN_SERVICES.get('bijoe')
)
def get_default_form_class(self):
from .forms import ChartForm
return ChartForm
def get_additional_label(self):
if self.title:
return self.title
return ''
def get_cell_extra_context(self, context):
context = super().get_cell_extra_context(context)
context['title'] = self.title
context['url'] = self.url
return context
class StatisticManager(models.Manager):
def get_by_natural_key(self, slug, site_slug, service_slug):
return self.get_or_create(slug=slug, site_slug=site_slug, service_slug=service_slug)[0]
class Statistic(models.Model):
slug = models.SlugField(_('Slug'), max_length=256)
label = models.CharField(_('Label'), max_length=256)
site_slug = models.SlugField(_('Site slug'), max_length=256)
service_slug = models.SlugField(_('Service slug'), max_length=256)
site_title = models.CharField(_('Site title'), max_length=256)
url = models.URLField(_('Data URL'))
filters = JSONField(default=list)
has_future_data = models.BooleanField(default=False)
data_type = models.CharField(max_length=32)
deprecated = models.BooleanField(default=False)
available = models.BooleanField(_('Available data'), default=True)
last_update = models.DateTimeField(_('Last update'), null=True, auto_now=True)
objects = StatisticManager()
class Meta:
ordering = ['-available', 'deprecated', 'site_title', 'label']
unique_together = ['slug', 'site_slug', 'service_slug']
def __str__(self):
name = _('%(title)s: %(label)s') % {'title': self.site_title or self.site_slug, 'label': self.label}
if not self.available:
name = _('%s (unavailable)') % name
elif self.deprecated:
name = _('%s (deprecated)') % name
return name
def natural_key(self):
return (self.slug, self.site_slug, self.service_slug)
def has_native_support_for_interval(self, time_interval):
# pylint: disable=not-an-iterable
return any(
time_interval == x['id']
for filter_ in self.filters
for x in filter_['options']
if filter_['id'] == 'time_interval'
)
TIME_FILTERS = [
('previous-year', _('Previous year')),
('current-year', _('Current year')),
('next-year', _('Next year')),
('previous-month', _('Previous month')),
('current-month', _('Current month')),
('next-month', _('Next month')),
('previous-week', _('Previous week')),
('current-week', _('Current week')),
('next-week', _('Next week')),
('range', _('Free range (date)')),
('range-template', _('Free range (template)')),
]
@register_cell_class
class ChartNgCell(CellBase):
statistic = models.ForeignKey(
verbose_name=_('Data'),
to=Statistic,
blank=False,
null=True,
on_delete=models.SET_NULL,
related_name='cells',
help_text=_(
'This list may take a few seconds to be updated, please refresh the page if an item is missing.'
),
)
subfilters = JSONField(default=list)
filter_params = JSONField(default=dict)
title = models.CharField(_('Title'), max_length=150, blank=True)
time_range = models.CharField(
_('Shown period'),
max_length=20,
blank=True,
choices=TIME_FILTERS,
)
time_range_start = models.DateField(_('From'), null=True, blank=True)
time_range_end = models.DateField(_('To'), null=True, blank=True)
time_range_start_template = models.CharField(
_('From'),
max_length=200,
blank=True,
validators=[django_template_validator],
help_text=_(
'Template code returning a date. For example, Monday in two weeks would be '
'today|add_days:"14"|adjust_to_week_monday. Page variables are also accessible.'
),
)
time_range_end_template = models.CharField(
_('To'),
max_length=200,
blank=True,
validators=[django_template_validator],
)
chart_type = models.CharField(
_('Chart Type'),
max_length=20,
default='bar',
choices=(
('bar', _('Bar')),
('horizontal-bar', _('Horizontal Bar')),
('stacked-bar', _('Stacked Bar')),
('stacked-bar-percent', _('Stacked Bar (%)')),
('line', _('Line')),
('pie', _('Pie')),
('dot', _('Dot')),
('table', _('Table')),
('table-inverted', _('Table (inverted)')),
),
)
height = models.CharField(
_('Height'),
max_length=20,
default='250',
choices=(
('150', _('Short (150px)')),
('250', _('Average (250px)')),
('350', _('Tall (350px)')),
),
)
sort_order = models.CharField(
_('Sort data'),
max_length=5,
default='none',
help_text=_('This setting only applies for one-dimensional charts.'),
choices=(
('none', _('None')),
('alpha', _('Alphabetically')),
('asc', _('Increasing values')),
('desc', _('Decreasing values')),
),
)
hide_null_values = models.BooleanField(
default=False,
verbose_name=_('Hide null values'),
help_text=_('This setting only applies for one-dimensional charts.'),
)
manager_form_template = 'combo/chartngcell_form.html'
invalid_reason_codes = {
'missing_statistic_url': _('No statistic URL set'),
'statistic_data_not_found': _('Statistic URL seems to unexist'),
'statistic_url_invalid': _('Statistic URL seems to be invalid'),
}
class Meta:
verbose_name = _('Chart')
class Media:
js = ('js/chartngcell.js',)
@classmethod
def is_enabled(cls):
return settings.KNOWN_SERVICES.get('bijoe') or settings.STATISTICS_PROVIDERS
def get_default_form_class(self):
from .forms import ChartNgForm
return ChartNgForm
def get_additional_label(self):
return self.title
def is_relevant(self, context):
return bool(self.statistic)
def check_validity(self):
if not self.statistic:
return
if not self.statistic.url:
self.mark_as_invalid('missing_statistic_url')
return
resp = None
try:
resp = self.get_statistic_data()
except (RequestException, MissingRequest, MissingVariable):
pass
self.set_validity_from_url(
resp, not_found_code='statistic_data_not_found', invalid_code='statistic_url_invalid'
)
def get_statistic_data(self, filter_params=None, raise_if_not_cached=False, invalidate_cache=False):
return requests.get(
self.statistic.url,
params=filter_params or self.get_filter_params(),
cache_duration=300,
remote_service='auto',
without_user=True,
raise_if_not_cached=raise_if_not_cached,
log_errors=False,
invalidate_cache=invalidate_cache,
)
def get_chart(self, width=None, height=None, raise_if_not_cached=False):
filter_params = self.get_filter_params()
transaction.on_commit(
lambda: spooler.refresh_statistics_data(cell_pk=self.pk, filter_params=filter_params)
)
response = self.get_statistic_data(filter_params, raise_if_not_cached)
response.raise_for_status()
response = response.json()
style = pygal.style.DefaultStyle(font_family='"Open Sans", sans-serif', background='transparent')
chart = {
'bar': pygal.Bar,
'horizontal-bar': pygal.HorizontalBar,
'stacked-bar': pygal.StackedBar,
'stacked-bar-percent': pygal.StackedBar,
'line': pygal.Line,
'pie': pygal.Pie,
'dot': pygal.Dot,
'table': pygal.Bar,
'table-inverted': pygal.Bar,
}[self.chart_type](config=pygal.Config(style=copy.copy(style), order_min=0.1, max_scale=5))
if self.statistic.service_slug == 'bijoe':
x_labels, y_labels, data = self.parse_response(response, chart)
chart.x_labels = x_labels
if chart.axis_count == 1:
data = self.process_one_dimensional_data(chart, data)
self.add_data_to_chart(chart, data, y_labels)
else:
data = response['data']
interval = self.filter_params.get('time_interval', '')
if data['x_labels'] and interval:
if interval == 'day' or not self.statistic.has_native_support_for_interval(interval):
self.aggregate_data(data, interval)
elif interval == 'month':
data['x_labels'] = [
format_date(datetime.strptime(x, '%Y-%m'), 'M Y') for x in data['x_labels']
]
chart.x_labels = data['x_labels']
chart.axis_count = min(len(data['series']), 2)
if self.statistic.data_type:
chart.config.value_formatter = self.get_value_formatter(self.statistic.data_type)
chart.compute_sum = False
if chart.axis_count == 1:
data['series'][0]['data'] = self.process_one_dimensional_data(
chart, data['series'][0]['data']
)
if self.chart_type == 'pie':
data["series"] = [
{"label": label, "data": [data]}
for label, data in zip(chart.x_labels, data["series"][0]["data"])
if data
]
if self.chart_type == 'stacked-bar-percent':
self.make_percent([serie['data'] for serie in data['series']])
for serie in data['series']:
chart.add(serie['label'], serie['data'])
self.configure_chart(chart, width, height)
return chart
def get_filter_params(self):
params = {k: self.evaluate_filter_value(v) for k, v in self.filter_params.items() if v}
now = timezone.now().date()
if self.time_range == 'current-year':
params['start'] = date(year=now.year, month=1, day=1)
params['end'] = date(year=now.year + 1, month=1, day=1)
elif self.time_range == 'previous-year':
params['start'] = date(year=now.year - 1, month=1, day=1)
params['end'] = date(year=now.year, month=1, day=1)
elif self.time_range == 'next-year':
params['start'] = date(year=now.year + 1, month=1, day=1)
params['end'] = date(year=now.year + 2, month=1, day=1)
elif self.time_range == 'current-month':
params['start'] = now.replace(day=1)
params['end'] = now + relativedelta(day=1, months=1)
elif self.time_range == 'previous-month':
params['start'] = now + relativedelta(day=1, months=-1)
params['end'] = now.replace(day=1)
elif self.time_range == 'next-month':
params['start'] = now + relativedelta(day=1, months=1)
params['end'] = now + relativedelta(day=1, months=2)
elif self.time_range == 'current-week':
params['start'] = now + relativedelta(weekday=MO(-1))
params['end'] = now + relativedelta(weekday=MO(+1), days=+1)
elif self.time_range == 'previous-week':
params['start'] = now + relativedelta(weekday=MO(-2))
params['end'] = now + relativedelta(weekday=MO(-1))
elif self.time_range == 'next-week':
params['start'] = now + relativedelta(weekday=MO(+1), days=+1)
params['end'] = now + relativedelta(weekday=MO(+2), days=+1)
elif self.time_range == 'range':
if self.time_range_start:
params['start'] = self.time_range_start
if self.time_range_end:
params['end'] = self.time_range_end
elif self.time_range == 'range-template':
if self.time_range_start_template:
start = self.evaluate_range_template(self.time_range_start_template)
if start:
params['start'] = start
if self.time_range_end_template:
end = self.evaluate_range_template(self.time_range_end_template)
if end:
params['end'] = end
if 'time_interval' in params and not self.statistic.has_native_support_for_interval(
params['time_interval']
):
params['time_interval'] = 'day'
return params
def evaluate_range_template(self, value):
if value in self.page.extra_variables:
value = self.page.extra_variables[value].strip('{ }')
context = self.request_context
context.update({'now': datetime.now, 'today': datetime.now})
try:
return Template('{{ %s|date:"Y-m-d" }}' % value).render(context)
except (VariableDoesNotExist, TemplateSyntaxError):
return None
def evaluate_filter_value(self, value):
if isinstance(value, list) or not value.startswith('variable:'):
return value
try:
variable = self.page.extra_variables[value.replace('variable:', '')]
except KeyError:
raise MissingVariable
return Template(variable).render(self.request_context)
@cached_property
def request_context(self):
if not getattr(self, '_request', None):
raise MissingRequest
ctx = RequestContext(self._request, getattr(self._request, 'extra_context', {}))
ctx['request'] = self._request
return ctx
def parse_response(self, response, chart):
# normalize axis to have a fake axis when there are no dimensions and
# always a x axis when there is a single dimension.
data = response['data']
loop_labels = response['axis'].get('loop') or []
x_labels = response['axis'].get('x_labels') or []
y_labels = response['axis'].get('y_labels') or []
if loop_labels:
if 'x_labels' in response['axis'] and 'y_labels' in response['axis']:
# no support for three dimensions
raise UnsupportedDataSet()
if not y_labels:
y_labels = loop_labels
else:
x_labels, y_labels = y_labels, loop_labels
if (
x_labels
and y_labels
and (len(y_labels) != len(data) or not all([len(x) == len(x_labels) for x in data]))
):
# varying dimensions
raise UnsupportedDataSet()
if not x_labels and not y_labels: # unidata
x_labels = ['']
y_labels = ['']
data = [data]
chart.axis_count = 0
elif not x_labels:
x_labels = y_labels
y_labels = ['']
chart.axis_count = 1
elif not y_labels:
y_labels = ['']
chart.axis_count = 1
else:
chart.axis_count = 2
chart.compute_sum = bool(response.get('measure') == 'integer' and chart.axis_count > 0)
formatter = self.get_value_formatter(response.get('unit'), response.get('measure'))
if formatter is not None:
chart.config.value_formatter = formatter
return x_labels, y_labels, data
def configure_dot_chart(self, chart, width, height):
chart.show_legend = False
# use a single colour for dots
chart.config.style.colors = ('#1f77b4',) * max(len(chart.x_labels), 1)
def configure_horizontal_bar_chart(self, chart, width, height):
if width and width < 500:
# truncate labels
chart.x_labels = [pygal.util.truncate(x, 15) for x in chart.x_labels]
def configure_pie_chart(self, chart, width, height):
chart.show_legend = True
if width and height:
# pies are as tall as wide, reserve the appropriate space and distribute
# the rest for the legend.
chart.truncate_legend = (width - height) // 10
elif width:
chart.truncate_legend = width // 20
def configure_chart(self, chart, width, height):
auto_height_scale = pygal.style.DefaultStyle.legend_font_size * 1.75
chart.config.margin = 0
if width:
chart.config.width = width
height = height or int(self.height)
# adapt chart's height to legend length
chart.config.height = max(height, auto_height_scale * len(chart.raw_series))
if width or height:
chart.config.explicit_size = True
chart.config.js = [os.path.join(settings.STATIC_URL, 'js/pygal-tooltips.js')]
chart.show_legend = bool(chart.axis_count > 1)
chart.truncate_legend = 30
# matplotlib tab10 palette
chart.config.style.colors = (
'#1f77b4',
'#ff7f0e',
'#2ca02c',
'#d62728',
'#9467bd',
'#8c564b',
'#e377c2',
'#7f7f7f',
'#bcbd22',
'#17becf',
)
custom_configure_method_name = 'configure_%s_chart' % self.chart_type.replace('-', '_')
if hasattr(self, custom_configure_method_name):
getattr(self, custom_configure_method_name)(chart, width, height)
if self.chart_type != 'pie':
if width and width < 500:
chart.legend_at_bottom = True
# restore demanded chart's height
chart.config.height = height
def process_one_dimensional_data(self, chart, data):
if self.hide_null_values:
data = self.hide_values(chart, data)
if data and self.sort_order != 'none':
data = self.sort_values(chart, data)
if getattr(chart, 'compute_sum', True) and self.chart_type in ('table', 'table-inverted'):
data = self.add_total_to_line_table(chart, data)
return data
@staticmethod
def hide_values(chart, data):
x_labels, new_data = [], []
for label, value in zip(chart.x_labels, data):
if value:
x_labels.append(label)
new_data.append(value)
chart.x_labels = x_labels
return new_data
def sort_values(self, chart, data):
if self.sort_order == 'alpha':
digit_re = re.compile('([0-9]+)')
def natural_sort_key(item):
return [int(text) if text.isdigit() else text.lower() for text in digit_re.split(item[0])]
tmp_items = sorted(zip(chart.x_labels, data), key=natural_sort_key)
elif self.sort_order == 'asc':
tmp_items = sorted(zip(chart.x_labels, data), key=lambda x: (x[1] or 0))
elif self.sort_order == 'desc':
tmp_items = sorted(zip(chart.x_labels, data), key=lambda x: (x[1] or 0), reverse=True)
x_labels, sorted_data = zip(*((label, value) for label, value in tmp_items))
chart.x_labels = list(x_labels)
return list(sorted_data)
@staticmethod
def add_total_to_line_table(chart, data):
# workaround pygal
chart.compute_sum = False
# do not add total for single point
if len(data) <= 1:
return data
data.append(sum(x for x in data if x is not None))
chart.x_labels.append(gettext('Total'))
return data
def add_data_to_chart(self, chart, data, y_labels):
if self.chart_type != 'pie':
series_data = []
for i, serie_label in enumerate(y_labels):
if chart.axis_count < 2:
values = data
else:
values = [data[i][j] for j in range(len(chart.x_labels))]
series_data.append(values)
chart.add(serie_label, values)
if self.chart_type == 'stacked-bar-percent':
self.make_percent(series_data)
else:
# pie, create a serie by data, to get different colours
values = data
for label, value in zip(chart.x_labels, values):
if not value:
continue
chart.add(label, value)
@staticmethod
def get_value_formatter(unit, measure='duration'):
if unit == 'seconds' or measure == 'duration':
def format_duration(value):
base_date = datetime(1871, 3, 18)
return timesince(base_date, base_date + timedelta(seconds=value))
return format_duration
elif measure == 'percent':
percent_formatter = lambda x: f'{x:.1f}%'
return percent_formatter
def make_percent(self, series_data):
for i, values in enumerate(zip(*series_data)):
sum_values = sum(v for v in values if v is not None)
if sum_values == 0:
continue
factor = 100 / sum_values
for values in series_data:
if values[i] is not None:
values[i] = round(values[i] * factor, 1)
@staticmethod
def aggregate_data(data, interval):
series_data = [serie['data'] for serie in data['series']]
dates = [datetime.strptime(label, '%Y-%m-%d') for label in data['x_labels']]
min_date, max_date = min(dates), max(dates)
date_formats = {
'day': 'd M Y',
# Translators: This indicates week number followed by year, for example it can yield W2-2021.
# First "W" is the first letter of the word "week" and should be translated accordingly, second
# "W" and "o" are interpreted by Django's date filter and should be left as is. First W is
# backslash escaped to prevent it from being interpreted, translators should refer to Django's
# documentation in order to know if the new letter resulting of translation should be escaped or not.
'week': gettext(r'\WW-o'),
'month': 'M Y',
'year': 'Y',
'weekday': 'l',
}
if interval == 'day':
x_labels = [
format_date(min_date + timedelta(days=i), date_formats['day'])
for i in range((max_date - min_date).days + 1)
]
elif interval == 'month':
month_difference = max_date.month - min_date.month + (max_date.year - min_date.year) * 12
x_labels = [
format_date(min_date + relativedelta(months=i), date_formats['month'])
for i in range(month_difference + 1)
]
elif interval == 'year':
x_labels = [str(year) for year in range(min_date.year, max_date.year + 1)]
elif interval == 'weekday':
x_labels = [str(label) for label in WEEKDAYS.values()]
elif interval == 'week':
x_labels = []
date, last_date = min_date, max_date
if min_date.weekday() > max_date.weekday():
last_date += relativedelta(weeks=1)
while date <= last_date:
x_labels.append(format_date(date, date_formats['week']))
date += relativedelta(weeks=1)
aggregates = OrderedDict((label, [0] * len(series_data)) for label in x_labels)
for i, date in enumerate(dates):
key = format_date(date, date_formats[interval])
for j, dummy in enumerate(series_data):
aggregates[key][j] += series_data[j][i] or 0
data['x_labels'] = x_labels
for i, serie in enumerate(data['series']):
serie['data'] = [values[i] for values in aggregates.values()]
@property
def available_filters(self):
return self.statistic.filters + self.subfilters
def update_subfilters(self, filter_params=None):
self._request = get_request()
try:
response = self.get_statistic_data(filter_params=filter_params)
except (TemplateSyntaxError, VariableDoesNotExist):
return
try:
response.raise_for_status()
data = response.json()['data']
except Exception:
return
new_subfilters = data.get('subfilters', [])
if self.subfilters != new_subfilters:
self.subfilters = new_subfilters
subfilter_ids = {filter_['id'] for filter_ in self.available_filters}
self.filter_params = {k: v for k, v in self.filter_params.items() if k in subfilter_ids}
self.save()
def get_cache_key(self, filters_cell_id):
return 'dataviz:%s:%s' % (filters_cell_id, self.pk)
@register_cell_class
class ChartFiltersCell(CellBase):
filters = JSONField(_('Filters'), default=dict)
title = _('Filters')
default_template_name = 'combo/chart-filters.html'
manager_form_template = 'combo/chartfilterscell_form.html'
max_one_by_page = True
class Meta:
verbose_name = _('Filters')
class Media:
js = ('js/combo.multiselectwidget.js',)
css = {'all': ('css/combo.multiselectwidget.css',)}
@classmethod
def is_enabled(cls):
return settings.CHART_FILTERS_CELL_ENABLED and settings.STATISTICS_PROVIDERS
def get_cell_extra_context(self, context):
from .forms import ChartFiltersForm
ctx = super().get_cell_extra_context(context)
if 'filters_cell_id' in context['request'].GET: # detect refresh on submit
ctx['form'] = ChartFiltersForm(
data=context['request'].GET,
page=self.page,
filters_cell=self,
filters_cell_id=context['request'].GET['filters_cell_id'],
)
else:
ctx['form'] = ChartFiltersForm(page=self.page, filters_cell=self)
return ctx
def get_default_form_class(self):
from .forms import ChartFiltersConfigForm
return ChartFiltersConfigForm