dataviz: aggregate received data by time intervals (#53180)

This commit is contained in:
Valentin Deniaud 2021-04-19 17:25:45 +02:00
parent 3bce1589f1
commit 9ca7e0c17e
3 changed files with 162 additions and 5 deletions

View File

@ -21,6 +21,7 @@ from django import forms
from django.conf import settings
from django.db import transaction
from django.db.models import Q
from django.utils.translation import ugettext_lazy as _
from combo.utils import cache_during_request, requests, spooler
@ -54,6 +55,11 @@ def trigger_statistics_list_refresh():
class ChartNgForm(forms.ModelForm):
blank_choice = ('', '---------')
time_intervals = (
('_month', _('Month')),
('_year', _('Year')),
('_weekday', _('Week day')),
)
class Meta:
model = ChartNgCell
@ -109,10 +115,16 @@ class ChartNgForm(forms.ModelForm):
self.fields = OrderedDict((field_id, self.fields[field_id]) for field_id in field_ids)
if 'time_interval' in self.fields:
self.extend_time_interval_choices()
def save(self, *args, **kwargs):
if 'statistic' in self.changed_data:
self.instance.filter_params.clear()
self.instance.time_range = ''
for filter_ in self.instance.statistic.filters:
if 'default' in filter_:
self.instance.filter_params[filter_['id']] = filter_['default']
else:
for filter_ in self.instance.statistic.filters:
field = filter_['id']
@ -122,3 +134,11 @@ class ChartNgForm(forms.ModelForm):
else:
self.instance.filter_params.pop(field, None)
return super().save(*args, **kwargs)
def extend_time_interval_choices(self):
choice_ids = {choice_id for choice_id, _ in self.fields['time_interval'].choices}
if 'day' not in choice_ids:
return
for choice in self.time_intervals:
if choice[0].strip('_') not in choice_ids:
self.fields['time_interval'].choices.append(choice)

View File

@ -17,14 +17,18 @@
import copy
import os
import sys
from datetime import date
from collections import OrderedDict
from datetime import date, datetime, timedelta
import pygal
import pygal.util
from dateutil.relativedelta import relativedelta
from django.conf import settings
from django.db import models, transaction
from django.template.defaultfilters import date as format_date
from django.urls import reverse
from django.utils import timezone
from django.utils.dates import WEEKDAYS
from django.utils.encoding import force_text
from django.utils.translation import gettext
from django.utils.translation import ugettext_lazy as _
@ -324,6 +328,11 @@ class ChartNgCell(CellBase):
self.add_data_to_chart(chart, data, y_labels)
else:
data = response['data']
interval = self.filter_params.get('time_interval', '')
if interval == 'day' or interval.startswith('_'):
self.aggregate_data(data, interval)
chart.x_labels = data['x_labels']
chart.axis_count = min(len(data['series']), 2)
self.prepare_chart(chart, width, height)
@ -365,6 +374,8 @@ class ChartNgCell(CellBase):
params['start'] = self.time_range_start
if self.time_range_end:
params['end'] = self.time_range_end
if 'time_interval' in params and params['time_interval'].startswith('_'):
params['time_interval'] = 'day'
return params
def parse_response(self, response, chart):
@ -549,3 +560,35 @@ class ChartNgCell(CellBase):
for values in series_data:
if values[i] is not None:
values[i] = round(values[i] * factor, 1)
@staticmethod
def aggregate_data(data, interval):
series_data = [serie['data'] for serie in data['series']]
dates = [datetime.strptime(label, '%Y-%m-%d') for label in data['x_labels']]
min_date, max_date = min(dates), max(dates)
if interval == 'day':
x_labels = [
(min_date + timedelta(days=i)).strftime('%d-%m-%Y')
for i in range((max_date - min_date).days + 1)
]
elif interval == '_month':
month_difference = max_date.month - min_date.month + (max_date.year - min_date.year) * 12
x_labels = [
(min_date + relativedelta(months=i)).strftime('%m-%Y') for i in range(month_difference + 1)
]
elif interval == '_year':
x_labels = [str(year) for year in range(min_date.year, max_date.year + 1)]
elif interval == '_weekday':
x_labels = [str(label) for label in WEEKDAYS.values()]
aggregates = OrderedDict((label, [0] * len(series_data)) for label in x_labels)
date_formats = {'day': 'd-m-Y', '_month': 'm-Y', '_year': 'Y', '_weekday': 'l'}
for i, date in enumerate(dates):
key = format_date(date, date_formats[interval])
for j in range(len(series_data)):
aggregates[key][j] += series_data[j][i] or 0
data['x_labels'] = x_labels
for i, serie in enumerate(data['series']):
serie['data'] = [values[i] for values in aggregates.values()]

View File

@ -318,6 +318,22 @@ STATISTICS_LIST = {
'name': '404 not found stat',
'id': 'not-found',
},
{
'url': 'https://authentic.example.com/api/statistics/daily/',
'name': 'Daily discontinuous serie',
'id': 'daily',
"filters": [
{
"default": "day",
"id": "time_interval",
"label": "Time interval",
"options": [
{"id": "day", "label": "Day"},
],
"required": True,
}
],
},
]
}
@ -352,6 +368,17 @@ def new_api_mock(url, request):
return {'content': json.dumps(response), 'request': request, 'status_code': 200}
if url.path == '/api/statistics/not-found/':
return {'content': b'', 'request': request, 'status_code': 404}
if url.path == '/api/statistics/daily/':
response = {
'data': {
'series': [
{'data': [None, 1, 16, 2], 'label': 'Serie 1'},
{'data': [2, 2, 1, None], 'label': 'Serie 2'},
],
'x_labels': ['2020-10-06', '2020-10-13', '2020-11-30', '2022-02-01'],
},
}
return {'content': json.dumps(response), 'request': request, 'status_code': 200}
@pytest.fixture
@ -1048,7 +1075,7 @@ def test_chartng_cell_manager_new_api(app, admin_user, new_api_statistics):
statistics_field = resp.form[field_prefix + 'statistic']
assert len(statistics_field.options) == len(STATISTICS_LIST['data']) + 1
assert statistics_field.value == str(cell.statistic.pk)
assert statistics_field.options[3][2] == 'Connection: One serie stat'
assert statistics_field.options[4][2] == 'Connection: One serie stat'
time_interval_field = resp.form[field_prefix + 'time_interval']
assert time_interval_field.pos == statistics_field.pos + 1
@ -1057,6 +1084,7 @@ def test_chartng_cell_manager_new_api(app, admin_user, new_api_statistics):
('day', False, 'Day'),
('month', True, 'Month'),
('year', False, 'Year'),
('_weekday', False, 'Week day'),
]
ou_field = resp.form[field_prefix + 'ou']
@ -1293,11 +1321,11 @@ def test_chartng_cell_new_api_filter_params(new_api_statistics, nocache, freezer
assert 'time_interval=' not in request.url
assert 'ou=' not in request.url
cell.filter_params = {'time_interval': 'day', 'ou': 'default'}
cell.filter_params = {'time_interval': 'month', 'ou': 'default'}
cell.save()
chart = cell.get_chart()
request = new_api_mock.call['requests'][1]
assert 'time_interval=day' in request.url
assert 'time_interval=month' in request.url
assert 'ou=default' in request.url
freezer.move_to('2020-03-02 12:01')
@ -1305,7 +1333,7 @@ def test_chartng_cell_new_api_filter_params(new_api_statistics, nocache, freezer
cell.save()
chart = cell.get_chart()
request = new_api_mock.call['requests'][2]
assert 'time_interval=day' in request.url
assert 'time_interval=month' in request.url
assert 'ou=default' in request.url
assert 'start=2019-01-01' in request.url and 'end=2020-01-01' in request.url
@ -1350,3 +1378,69 @@ def test_dataviz_check_validity(nocache):
cell.check_validity()
validity_info = ValidityInfo.objects.latest('pk')
assert validity_info.invalid_reason_code == 'statistic_data_not_found'
@with_httmock(new_api_mock)
def test_chartng_cell_new_api_aggregation(new_api_statistics, app, admin_user, nocache):
page = Page.objects.create(title='One', slug='index')
cell = ChartNgCell(page=page, order=1, placeholder='content')
cell.statistic = Statistic.objects.get(slug='daily')
cell.save()
app = login(app)
resp = app.get('/manage/pages/%s/' % page.id)
time_interval_field = resp.form['cdataviz_chartngcell-%s-time_interval' % cell.id]
assert time_interval_field.value == 'day'
assert time_interval_field.options == [
('day', True, 'Day'),
('_month', False, 'Month'),
('_year', False, 'Year'),
('_weekday', False, 'Week day'),
]
resp.form.submit()
cell.refresh_from_db()
chart = cell.get_chart()
assert len(chart.x_labels) == 484
assert chart.x_labels[:3] == ['06-10-2020', '07-10-2020', '08-10-2020']
assert chart.x_labels[-3:] == ['30-01-2022', '31-01-2022', '01-02-2022']
assert chart.raw_series[0][0][:8] == [0, 0, 0, 0, 0, 0, 0, 1]
assert chart.raw_series[1][0][:8] == [2, 0, 0, 0, 0, 0, 0, 2]
time_interval_field.value = '_month'
resp.form.submit()
cell.refresh_from_db()
chart = cell.get_chart()
assert 'time_interval=day' in new_api_mock.call['requests'][1].url
assert len(chart.x_labels) == 17
assert chart.x_labels[:3] == ['10-2020', '11-2020', '12-2020']
assert chart.x_labels[-3:] == ['12-2021', '01-2022', '02-2022']
assert chart.raw_series == [
([1, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2], {'title': 'Serie 1'}),
([4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], {'title': 'Serie 2'}),
]
time_interval_field.value = '_year'
resp.form.submit()
cell.refresh_from_db()
chart = cell.get_chart()
assert 'time_interval=day' in new_api_mock.call['requests'][2].url
assert chart.x_labels == ['2020', '2021', '2022']
assert chart.raw_series == [
([17, 0, 2], {'title': 'Serie 1'}),
([5, 0, 0], {'title': 'Serie 2'}),
]
time_interval_field.value = '_weekday'
resp.form.submit()
cell.refresh_from_db()
chart = cell.get_chart()
assert 'time_interval=day' in new_api_mock.call['requests'][3].url
assert chart.x_labels == ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
assert chart.raw_series == [
([16, 3, 0, 0, 0, 0, 0], {'title': 'Serie 1'}),
([1, 4, 0, 0, 0, 0, 0], {'title': 'Serie 2'}),
]