# combo - content management system # Copyright (C) 2014-2015 Entr'ouvert # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . import copy import os import re from collections import OrderedDict from datetime import date, datetime, timedelta import pygal import pygal.util from dateutil.relativedelta import MO, relativedelta from django.conf import settings from django.db import models, transaction from django.db.models import JSONField from django.template import RequestContext, Template, TemplateSyntaxError, VariableDoesNotExist from django.template.defaultfilters import date as format_date from django.urls import reverse from django.utils import timezone from django.utils.dates import WEEKDAYS from django.utils.functional import cached_property from django.utils.timesince import timesince from django.utils.translation import gettext from django.utils.translation import gettext_lazy as _ from requests.exceptions import RequestException from combo.data.library import register_cell_class from combo.data.models import CellBase, django_template_validator from combo.middleware import get_request from combo.utils import get_templated_url, requests, spooler class UnsupportedDataSet(Exception): pass class MissingRequest(Exception): pass class MissingVariable(Exception): pass @register_cell_class class Gauge(CellBase): title = models.CharField(_('Title'), max_length=150, blank=True, null=True) url = models.CharField(_('URL'), max_length=150, blank=True, null=True) data_source = models.CharField(_('Data Source'), max_length=150, blank=True, null=True) jsonp_data_source = models.BooleanField(_('Use JSONP to get data'), default=True) max_value = models.PositiveIntegerField(_('Max Value'), blank=True, null=True) default_template_name = 'combo/gauge-cell.html' class Media: js = ('js/gauge.min.js', 'js/combo.gauge.js') class Meta: verbose_name = _('Gauge') def get_additional_label(self): return self.title def is_relevant(self, context): return bool(self.data_source) def get_cell_extra_context(self, context): if self.jsonp_data_source: data_source_url = get_templated_url(self.data_source) else: data_source_url = reverse('combo-ajax-gauge-count', kwargs={'cell': self.id}) return { 'cell': self, 'title': self.title, 'url': get_templated_url(self.url) if self.url else None, 'max_value': self.max_value, 'data_source_url': data_source_url, 'jsonp': self.jsonp_data_source, } @register_cell_class class ChartCell(CellBase): default_template_name = 'combo/dataviz-chart.html' title = models.CharField(_('Title'), max_length=150, blank=True, null=True) url = models.URLField(_('URL'), max_length=250, blank=True, null=True) class Meta: verbose_name = _('Chart (legacy)') @classmethod def is_enabled(cls): return ( settings.LEGACY_CHART_CELL_ENABLED and hasattr(settings, 'KNOWN_SERVICES') and settings.KNOWN_SERVICES.get('bijoe') ) def get_default_form_class(self): from .forms import ChartForm return ChartForm def get_additional_label(self): if self.title: return self.title return '' def get_cell_extra_context(self, context): context = super().get_cell_extra_context(context) context['title'] = self.title context['url'] = self.url return context class StatisticManager(models.Manager): def get_by_natural_key(self, slug, site_slug, service_slug): return self.get_or_create(slug=slug, site_slug=site_slug, service_slug=service_slug)[0] class Statistic(models.Model): slug = models.SlugField(_('Slug'), max_length=256) label = models.CharField(_('Label'), max_length=256) site_slug = models.SlugField(_('Site slug'), max_length=256) service_slug = models.SlugField(_('Service slug'), max_length=256) site_title = models.CharField(_('Site title'), max_length=256) url = models.URLField(_('Data URL')) filters = JSONField(default=list) has_future_data = models.BooleanField(default=False) data_type = models.CharField(max_length=32) deprecated = models.BooleanField(default=False) available = models.BooleanField(_('Available data'), default=True) last_update = models.DateTimeField(_('Last update'), null=True, auto_now=True) objects = StatisticManager() class Meta: ordering = ['-available', 'deprecated', 'site_title', 'label'] unique_together = ['slug', 'site_slug', 'service_slug'] def __str__(self): name = _('%(title)s: %(label)s') % {'title': self.site_title or self.site_slug, 'label': self.label} if not self.available: name = _('%s (unavailable)') % name elif self.deprecated: name = _('%s (deprecated)') % name return name def natural_key(self): return (self.slug, self.site_slug, self.service_slug) def has_native_support_for_interval(self, time_interval): # pylint: disable=not-an-iterable return any( time_interval == x['id'] for filter_ in self.filters for x in filter_['options'] if filter_['id'] == 'time_interval' ) TIME_FILTERS = [ ('previous-year', _('Previous year')), ('current-year', _('Current year')), ('next-year', _('Next year')), ('previous-month', _('Previous month')), ('current-month', _('Current month')), ('next-month', _('Next month')), ('previous-week', _('Previous week')), ('current-week', _('Current week')), ('next-week', _('Next week')), ('range', _('Free range (date)')), ('range-template', _('Free range (template)')), ] @register_cell_class class ChartNgCell(CellBase): statistic = models.ForeignKey( verbose_name=_('Data'), to=Statistic, blank=False, null=True, on_delete=models.SET_NULL, related_name='cells', help_text=_( 'This list may take a few seconds to be updated, please refresh the page if an item is missing.' ), ) subfilters = JSONField(default=list) filter_params = JSONField(default=dict) title = models.CharField(_('Title'), max_length=150, blank=True) time_range = models.CharField( _('Shown period'), max_length=20, blank=True, choices=TIME_FILTERS, ) time_range_start = models.DateField(_('From'), null=True, blank=True) time_range_end = models.DateField(_('To'), null=True, blank=True) time_range_start_template = models.CharField( _('From'), max_length=200, blank=True, validators=[django_template_validator], help_text=_( 'Template code returning a date. For example, Monday in two weeks would be ' 'today|add_days:"14"|adjust_to_week_monday. Page variables are also accessible.' ), ) time_range_end_template = models.CharField( _('To'), max_length=200, blank=True, validators=[django_template_validator], ) chart_type = models.CharField( _('Chart Type'), max_length=20, default='bar', choices=( ('bar', _('Bar')), ('horizontal-bar', _('Horizontal Bar')), ('stacked-bar', _('Stacked Bar')), ('stacked-bar-percent', _('Stacked Bar (%)')), ('line', _('Line')), ('pie', _('Pie')), ('dot', _('Dot')), ('table', _('Table')), ('table-inverted', _('Table (inverted)')), ), ) height = models.CharField( _('Height'), max_length=20, default='250', choices=( ('150', _('Short (150px)')), ('250', _('Average (250px)')), ('350', _('Tall (350px)')), ), ) sort_order = models.CharField( _('Sort data'), max_length=5, default='none', help_text=_('This setting only applies for one-dimensional charts.'), choices=( ('none', _('None')), ('alpha', _('Alphabetically')), ('asc', _('Increasing values')), ('desc', _('Decreasing values')), ), ) hide_null_values = models.BooleanField( default=False, verbose_name=_('Hide null values'), help_text=_('This setting only applies for one-dimensional charts.'), ) manager_form_template = 'combo/chartngcell_form.html' invalid_reason_codes = { 'missing_statistic_url': _('No statistic URL set'), 'statistic_data_not_found': _('Statistic URL seems to unexist'), 'statistic_url_invalid': _('Statistic URL seems to be invalid'), } class Meta: verbose_name = _('Chart') class Media: js = ('js/chartngcell.js',) @classmethod def is_enabled(cls): return settings.KNOWN_SERVICES.get('bijoe') or settings.STATISTICS_PROVIDERS def get_default_form_class(self): from .forms import ChartNgForm return ChartNgForm def get_additional_label(self): return self.title def is_relevant(self, context): return bool(self.statistic) def check_validity(self): if not self.statistic: return if not self.statistic.url: self.mark_as_invalid('missing_statistic_url') return resp = None try: resp = self.get_statistic_data() except (RequestException, MissingRequest, MissingVariable): pass self.set_validity_from_url( resp, not_found_code='statistic_data_not_found', invalid_code='statistic_url_invalid' ) def get_statistic_data(self, filter_params=None, raise_if_not_cached=False, invalidate_cache=False): return requests.get( self.statistic.url, params=filter_params or self.get_filter_params(), cache_duration=300, remote_service='auto', without_user=True, raise_if_not_cached=raise_if_not_cached, log_errors=False, invalidate_cache=invalidate_cache, ) def get_chart(self, width=None, height=None, raise_if_not_cached=False): filter_params = self.get_filter_params() transaction.on_commit( lambda: spooler.refresh_statistics_data(cell_pk=self.pk, filter_params=filter_params) ) response = self.get_statistic_data(filter_params, raise_if_not_cached) response.raise_for_status() response = response.json() style = pygal.style.DefaultStyle(font_family='"Open Sans", sans-serif', background='transparent') chart = { 'bar': pygal.Bar, 'horizontal-bar': pygal.HorizontalBar, 'stacked-bar': pygal.StackedBar, 'stacked-bar-percent': pygal.StackedBar, 'line': pygal.Line, 'pie': pygal.Pie, 'dot': pygal.Dot, 'table': pygal.Bar, 'table-inverted': pygal.Bar, }[self.chart_type](config=pygal.Config(style=copy.copy(style), order_min=0.1, max_scale=5)) if self.statistic.service_slug == 'bijoe': x_labels, y_labels, data = self.parse_response(response, chart) chart.x_labels = x_labels if chart.axis_count == 1: data = self.process_one_dimensional_data(chart, data) self.add_data_to_chart(chart, data, y_labels) else: data = response['data'] interval = self.filter_params.get('time_interval', '') if data['x_labels'] and interval: if interval == 'day' or not self.statistic.has_native_support_for_interval(interval): self.aggregate_data(data, interval) elif interval == 'month': data['x_labels'] = [ format_date(datetime.strptime(x, '%Y-%m'), 'M Y') for x in data['x_labels'] ] chart.x_labels = data['x_labels'] chart.axis_count = min(len(data['series']), 2) if self.statistic.data_type: chart.config.value_formatter = self.get_value_formatter(self.statistic.data_type) chart.compute_sum = False if chart.axis_count == 1: data['series'][0]['data'] = self.process_one_dimensional_data( chart, data['series'][0]['data'] ) if self.chart_type == 'pie': data["series"] = [ {"label": label, "data": [data]} for label, data in zip(chart.x_labels, data["series"][0]["data"]) if data ] if self.chart_type == 'stacked-bar-percent': self.make_percent([serie['data'] for serie in data['series']]) for serie in data['series']: chart.add(serie['label'], serie['data']) self.configure_chart(chart, width, height) return chart def get_filter_params(self): params = {k: self.evaluate_filter_value(v) for k, v in self.filter_params.items() if v} now = timezone.now().date() if self.time_range == 'current-year': params['start'] = date(year=now.year, month=1, day=1) params['end'] = date(year=now.year + 1, month=1, day=1) elif self.time_range == 'previous-year': params['start'] = date(year=now.year - 1, month=1, day=1) params['end'] = date(year=now.year, month=1, day=1) elif self.time_range == 'next-year': params['start'] = date(year=now.year + 1, month=1, day=1) params['end'] = date(year=now.year + 2, month=1, day=1) elif self.time_range == 'current-month': params['start'] = now.replace(day=1) params['end'] = now + relativedelta(day=1, months=1) elif self.time_range == 'previous-month': params['start'] = now + relativedelta(day=1, months=-1) params['end'] = now.replace(day=1) elif self.time_range == 'next-month': params['start'] = now + relativedelta(day=1, months=1) params['end'] = now + relativedelta(day=1, months=2) elif self.time_range == 'current-week': params['start'] = now + relativedelta(weekday=MO(-1)) params['end'] = now + relativedelta(weekday=MO(+1), days=+1) elif self.time_range == 'previous-week': params['start'] = now + relativedelta(weekday=MO(-2)) params['end'] = now + relativedelta(weekday=MO(-1)) elif self.time_range == 'next-week': params['start'] = now + relativedelta(weekday=MO(+1), days=+1) params['end'] = now + relativedelta(weekday=MO(+2), days=+1) elif self.time_range == 'range': if self.time_range_start: params['start'] = self.time_range_start if self.time_range_end: params['end'] = self.time_range_end elif self.time_range == 'range-template': if self.time_range_start_template: start = self.evaluate_range_template(self.time_range_start_template) if start: params['start'] = start if self.time_range_end_template: end = self.evaluate_range_template(self.time_range_end_template) if end: params['end'] = end if 'time_interval' in params and not self.statistic.has_native_support_for_interval( params['time_interval'] ): params['time_interval'] = 'day' return params def evaluate_range_template(self, value): if value in self.page.extra_variables: value = self.page.extra_variables[value].strip('{ }') context = self.request_context context.update({'now': datetime.now, 'today': datetime.now}) try: return Template('{{ %s|date:"Y-m-d" }}' % value).render(context) except (VariableDoesNotExist, TemplateSyntaxError): return None def evaluate_filter_value(self, value): if isinstance(value, list) or not value.startswith('variable:'): return value try: variable = self.page.extra_variables[value.replace('variable:', '')] except KeyError: raise MissingVariable return Template(variable).render(self.request_context) @cached_property def request_context(self): if not getattr(self, '_request', None): raise MissingRequest ctx = RequestContext(self._request, getattr(self._request, 'extra_context', {})) ctx['request'] = self._request return ctx def parse_response(self, response, chart): # normalize axis to have a fake axis when there are no dimensions and # always a x axis when there is a single dimension. data = response['data'] loop_labels = response['axis'].get('loop') or [] x_labels = response['axis'].get('x_labels') or [] y_labels = response['axis'].get('y_labels') or [] if loop_labels: if 'x_labels' in response['axis'] and 'y_labels' in response['axis']: # no support for three dimensions raise UnsupportedDataSet() if not y_labels: y_labels = loop_labels else: x_labels, y_labels = y_labels, loop_labels if ( x_labels and y_labels and (len(y_labels) != len(data) or not all([len(x) == len(x_labels) for x in data])) ): # varying dimensions raise UnsupportedDataSet() if not x_labels and not y_labels: # unidata x_labels = [''] y_labels = [''] data = [data] chart.axis_count = 0 elif not x_labels: x_labels = y_labels y_labels = [''] chart.axis_count = 1 elif not y_labels: y_labels = [''] chart.axis_count = 1 else: chart.axis_count = 2 chart.compute_sum = bool(response.get('measure') == 'integer' and chart.axis_count > 0) formatter = self.get_value_formatter(response.get('unit'), response.get('measure')) if formatter is not None: chart.config.value_formatter = formatter return x_labels, y_labels, data def configure_dot_chart(self, chart, width, height): chart.show_legend = False # use a single colour for dots chart.config.style.colors = ('#1f77b4',) * max(len(chart.x_labels), 1) def configure_horizontal_bar_chart(self, chart, width, height): if width and width < 500: # truncate labels chart.x_labels = [pygal.util.truncate(x, 15) for x in chart.x_labels] def configure_pie_chart(self, chart, width, height): chart.show_legend = True if width and height: # pies are as tall as wide, reserve the appropriate space and distribute # the rest for the legend. chart.truncate_legend = (width - height) // 10 elif width: chart.truncate_legend = width // 20 def configure_chart(self, chart, width, height): auto_height_scale = pygal.style.DefaultStyle.legend_font_size * 1.75 chart.config.margin = 0 if width: chart.config.width = width height = height or int(self.height) # adapt chart's height to legend length chart.config.height = max(height, auto_height_scale * len(chart.raw_series)) if width or height: chart.config.explicit_size = True chart.config.js = [os.path.join(settings.STATIC_URL, 'js/pygal-tooltips.js')] chart.show_legend = bool(chart.axis_count > 1) chart.truncate_legend = 30 # matplotlib tab10 palette chart.config.style.colors = ( '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', ) custom_configure_method_name = 'configure_%s_chart' % self.chart_type.replace('-', '_') if hasattr(self, custom_configure_method_name): getattr(self, custom_configure_method_name)(chart, width, height) if self.chart_type != 'pie': if width and width < 500: chart.legend_at_bottom = True # restore demanded chart's height chart.config.height = height def process_one_dimensional_data(self, chart, data): if self.hide_null_values: data = self.hide_values(chart, data) if data and self.sort_order != 'none': data = self.sort_values(chart, data) if getattr(chart, 'compute_sum', True) and self.chart_type in ('table', 'table-inverted'): data = self.add_total_to_line_table(chart, data) return data @staticmethod def hide_values(chart, data): x_labels, new_data = [], [] for label, value in zip(chart.x_labels, data): if value: x_labels.append(label) new_data.append(value) chart.x_labels = x_labels return new_data def sort_values(self, chart, data): if self.sort_order == 'alpha': digit_re = re.compile('([0-9]+)') def natural_sort_key(item): return [int(text) if text.isdigit() else text.lower() for text in digit_re.split(item[0])] tmp_items = sorted(zip(chart.x_labels, data), key=natural_sort_key) elif self.sort_order == 'asc': tmp_items = sorted(zip(chart.x_labels, data), key=lambda x: (x[1] or 0)) elif self.sort_order == 'desc': tmp_items = sorted(zip(chart.x_labels, data), key=lambda x: (x[1] or 0), reverse=True) x_labels, sorted_data = zip(*((label, value) for label, value in tmp_items)) chart.x_labels = list(x_labels) return list(sorted_data) @staticmethod def add_total_to_line_table(chart, data): # workaround pygal chart.compute_sum = False # do not add total for single point if len(data) <= 1: return data data.append(sum(x for x in data if x is not None)) chart.x_labels.append(gettext('Total')) return data def add_data_to_chart(self, chart, data, y_labels): if self.chart_type != 'pie': series_data = [] for i, serie_label in enumerate(y_labels): if chart.axis_count < 2: values = data else: values = [data[i][j] for j in range(len(chart.x_labels))] series_data.append(values) chart.add(serie_label, values) if self.chart_type == 'stacked-bar-percent': self.make_percent(series_data) else: # pie, create a serie by data, to get different colours values = data for label, value in zip(chart.x_labels, values): if not value: continue chart.add(label, value) @staticmethod def get_value_formatter(unit, measure='duration'): if unit == 'seconds' or measure == 'duration': def format_duration(value): base_date = datetime(1871, 3, 18) return timesince(base_date, base_date + timedelta(seconds=value)) return format_duration elif measure == 'percent': percent_formatter = lambda x: f'{x:.1f}%' return percent_formatter def make_percent(self, series_data): for i, values in enumerate(zip(*series_data)): sum_values = sum(v for v in values if v is not None) if sum_values == 0: continue factor = 100 / sum_values for values in series_data: if values[i] is not None: values[i] = round(values[i] * factor, 1) @staticmethod def aggregate_data(data, interval): series_data = [serie['data'] for serie in data['series']] dates = [datetime.strptime(label, '%Y-%m-%d') for label in data['x_labels']] min_date, max_date = min(dates), max(dates) date_formats = { 'day': 'd M Y', # Translators: This indicates week number followed by year, for example it can yield W2-2021. # First "W" is the first letter of the word "week" and should be translated accordingly, second # "W" and "o" are interpreted by Django's date filter and should be left as is. First W is # backslash escaped to prevent it from being interpreted, translators should refer to Django's # documentation in order to know if the new letter resulting of translation should be escaped or not. 'week': gettext(r'\WW-o'), 'month': 'M Y', 'year': 'Y', 'weekday': 'l', } if interval == 'day': x_labels = [ format_date(min_date + timedelta(days=i), date_formats['day']) for i in range((max_date - min_date).days + 1) ] elif interval == 'month': month_difference = max_date.month - min_date.month + (max_date.year - min_date.year) * 12 x_labels = [ format_date(min_date + relativedelta(months=i), date_formats['month']) for i in range(month_difference + 1) ] elif interval == 'year': x_labels = [str(year) for year in range(min_date.year, max_date.year + 1)] elif interval == 'weekday': x_labels = [str(label) for label in WEEKDAYS.values()] elif interval == 'week': x_labels = [] date, last_date = min_date, max_date if min_date.weekday() > max_date.weekday(): last_date += relativedelta(weeks=1) while date <= last_date: x_labels.append(format_date(date, date_formats['week'])) date += relativedelta(weeks=1) aggregates = OrderedDict((label, [0] * len(series_data)) for label in x_labels) for i, date in enumerate(dates): key = format_date(date, date_formats[interval]) for j, dummy in enumerate(series_data): aggregates[key][j] += series_data[j][i] or 0 data['x_labels'] = x_labels for i, serie in enumerate(data['series']): serie['data'] = [values[i] for values in aggregates.values()] @property def available_filters(self): return self.statistic.filters + self.subfilters def update_subfilters(self, filter_params=None): self._request = get_request() try: response = self.get_statistic_data(filter_params=filter_params) except (TemplateSyntaxError, VariableDoesNotExist): return try: response.raise_for_status() data = response.json()['data'] except Exception: return new_subfilters = data.get('subfilters', []) if self.subfilters != new_subfilters: self.subfilters = new_subfilters subfilter_ids = {filter_['id'] for filter_ in self.available_filters} self.filter_params = {k: v for k, v in self.filter_params.items() if k in subfilter_ids} self.save() def get_cache_key(self, filters_cell_id): return 'dataviz:%s:%s' % (filters_cell_id, self.pk) @register_cell_class class ChartFiltersCell(CellBase): filters = JSONField(_('Filters'), default=dict) title = _('Filters') default_template_name = 'combo/chart-filters.html' manager_form_template = 'combo/chartfilterscell_form.html' max_one_by_page = True class Meta: verbose_name = _('Filters') class Media: js = ('js/combo.multiselectwidget.js',) css = {'all': ('css/combo.multiselectwidget.css',)} @classmethod def is_enabled(cls): return settings.CHART_FILTERS_CELL_ENABLED and settings.STATISTICS_PROVIDERS def get_cell_extra_context(self, context): from .forms import ChartFiltersForm ctx = super().get_cell_extra_context(context) if 'filters_cell_id' in context['request'].GET: # detect refresh on submit ctx['form'] = ChartFiltersForm( data=context['request'].GET, page=self.page, filters_cell=self, filters_cell_id=context['request'].GET['filters_cell_id'], ) else: ctx['form'] = ChartFiltersForm(page=self.page, filters_cell=self) return ctx def get_default_form_class(self): from .forms import ChartFiltersConfigForm return ChartFiltersConfigForm