bijoe/bijoe/visualization/utils.py

391 lines
14 KiB
Python

# bijoe - BI dashboard
# Copyright (C) 2015 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import collections
import copy
import datetime
import decimal
import hashlib
import json
import re
from django.conf import settings
from django.core.cache import cache
from django.http import Http404
from django.utils.encoding import force_bytes, force_text
from django.utils.safestring import mark_safe
from django.utils.translation import ugettext_lazy as _
from ..engine import Engine, MeasureCell, Member
from ..utils import get_warehouses
from .ods import Workbook
class Visualization:
def __init__(
self, cube, representation, measure, drilldown_x=None, drilldown_y=None, filters=None, loop=None
):
self.cube = cube
self.representation = representation
self.measure = measure
self.drilldown_x = drilldown_x
self.drilldown_y = drilldown_y
self.filters = filters or {}
self.loop = loop
self.members = {}
@property
def drilldown(self):
drilldown = []
if self.loop:
drilldown.append(self.loop)
if self.drilldown_x:
drilldown.append(self.drilldown_x)
if self.drilldown_y:
drilldown.append(self.drilldown_y)
return drilldown
def to_json(self):
return {
'warehouse': self.cube.engine.warehouse.name,
'warehouse_slug': self.cube.engine.warehouse.slug,
'cube': self.cube.name,
'representation': self.representation,
'measure': self.measure and self.measure.name,
'drilldown_x': self.drilldown_x and self.drilldown_x.name,
'drilldown_y': self.drilldown_y and self.drilldown_y.name,
'filters': self.filters,
'loop': self.loop and self.loop.name,
}
def copy(self):
return Visualization(
self.cube,
self.representation,
measure=self.measure,
drilldown_x=self.drilldown_x,
drilldown_y=self.drilldown_y,
filters=copy.deepcopy(self.filters),
loop=self.loop,
)
@staticmethod
def get_cube(d, warehouses=None):
if not warehouses:
warehouses = get_warehouses()
for warehouse in warehouses:
if warehouse.slug and d.get('warehouse_slug'):
if d['warehouse_slug'] == warehouse.slug:
break
elif d['warehouse'] == warehouse.name: # legacy
break
else:
raise Http404('warehouse %s not found' % d['warehouse'])
engine = Engine(warehouse)
try:
return engine[d['cube']]
except KeyError:
raise Http404('cube %s not found' % d['cube'])
@classmethod
def from_json(cls, d):
cube = cls.get_cube(d)
representation = d['representation']
measure = cube.measures[d['measure']]
drilldown_x = cube.dimensions[d['drilldown_x']] if 'drilldown_x' in d else None
drilldown_y = cube.dimensions[d['drilldown_y']] if 'drilldown_y' in d else None
filters = d.get('filters', {})
loop = d.get('loop')
if loop:
loop = cube.dimensions[loop]
return cls(
cube,
representation,
measure,
drilldown_x=drilldown_x,
drilldown_y=drilldown_y,
filters=filters,
loop=loop,
)
@classmethod
def from_form(cls, cube, form):
cleaned_data = form.cleaned_data
filters = {}
for kw, values in cleaned_data.items():
if values and kw.startswith('filter__'):
dimension_name = kw[8:]
filters[dimension_name] = values
measure = cleaned_data.get('measure', [])
measure = measure and cube.measures[measure]
drilldown_x = cleaned_data.get('drilldown_x')
drilldown_x = drilldown_x and cube.dimensions[drilldown_x]
drilldown_y = cleaned_data.get('drilldown_y')
drilldown_y = drilldown_y and cube.dimensions[drilldown_y]
loop = cleaned_data.get('loop')
loop = loop and cube.dimensions[loop]
return cls(
cube,
cleaned_data['representation'],
measure,
drilldown_x=drilldown_x,
drilldown_y=drilldown_y,
filters=filters,
loop=loop,
)
@property
def key(self):
keys = [self.cube.engine.warehouse.name, self.cube.name]
if self.loop:
keys.append(self.loop.name)
for kw, value in self.filters.items():
if value is None:
continue
elif isinstance(value, (dict, list, tuple)):
# multiple values
if isinstance(value, dict):
value = value.items()
keys.append('$'.join([kw] + sorted(map(force_text, value))))
else:
# scalar values
keys.append('%s$%s' % (kw, force_text(value)))
keys += [dim.name for dim in self.drilldown]
keys += [self.measure.name]
key = '$'.join(v.encode('utf8') for v in keys)
return hashlib.md5(force_bytes(key)).hexdigest()
def data(self):
"""Execute aggregation query, list members and check None values in
dimensions.
"""
rows = list(self.cube.query(self.filters.items(), self.drilldown, [self.measure]))
self.members = {
dimension: list(dimension.members(filters=self.filters.items())) for dimension in self.drilldown
}
seen_none = set()
for cells in rows:
# Keep "empty" dimension value if there is a non-zero measure associated
if any(measure.value for measure in cells.measures):
for cell in cells.dimensions:
if cell.value is None:
if cell.dimension not in seen_none:
self.members[cell.dimension].append(Member(None, cell.dimension.absent_label))
seen_none.add(cell.dimension)
return rows
def cached(self):
key = self.key
data = cache.get(key)
if data is None:
data = list(self.data())
if settings.BIJOE_CACHE:
cache.set(key, data)
return data
def default_cell(self):
return MeasureCell(measure=self.measure, value=self.measure.default_value)
def table_2d(self):
'''Layout data into 2d tables'''
assert len(self.drilldown) == 2
data = self.data()
x_axis = self.members[self.drilldown_x]
y_axis = self.members[self.drilldown_y]
grid = collections.defaultdict(self.default_cell)
for cells in data:
x_id = cells.dimensions[0].value
y_id = cells.dimensions[1].value
grid[(x_id, y_id)] = cells.measures[0]
return (x_axis, y_axis), grid
def table_1d(self):
assert len(self.drilldown) == 1
data = self.data()
if self.drilldown_x:
axis = self.members[self.drilldown_x]
else:
axis = self.members[self.drilldown_y]
grid = collections.defaultdict(self.default_cell)
for cells in data:
grid[cells.dimensions[0].value] = cells.measures[0]
return axis, grid
def table(self):
table = []
if len(self.drilldown) == 2:
(x_axis, y_axis), grid = self.table_2d()
# Only compute sum of cells for count() measures
compute_sums = self.measure.expression.lower().startswith('count(')
compute_lines_sums = compute_sums and len(x_axis) > 1
compute_columns_sums = compute_sums and len(y_axis) > 1
compute_global_sum = compute_lines_sums and compute_columns_sums
sums_columns = collections.defaultdict(lambda: 0)
sums_lines = collections.defaultdict(lambda: 0)
sum_table = 0
for coord in grid:
value = grid[coord].value
if value is not None:
if compute_columns_sums:
sums_columns[coord[0]] += value
if compute_lines_sums:
sums_lines[coord[1]] += value
if compute_global_sum:
sum_table += value
table.append([''] + [x.label for x in x_axis])
# line sums header
if compute_lines_sums:
table[-1].append(_('Total'))
for y in y_axis:
table.append([y.label])
table[-1].extend('%s' % (grid[(x.id, y.id)],) for x in x_axis)
# line sums
if compute_lines_sums:
table[-1].append(sums_lines[y.id])
# columns sums
if compute_columns_sums:
table.append([_('Total')] + [sums_columns[x.id] for x in x_axis])
if compute_global_sum:
table[-1].append(sum_table)
elif self.drilldown_x:
x_axis, grid = self.table_1d()
table.append([self.drilldown_x.label])
table.append([self.measure.label])
for x in x_axis:
table[0].append(x.label)
table[1].append('%s' % (grid[x.id],)),
elif self.drilldown_y:
y_axis, grid = self.table_1d()
table.append([self.drilldown_y.label, self.measure.label])
for y in y_axis:
table.append(
[
y.label,
'%s' % (grid[y.id],),
]
)
else:
table.append([self.measure.label, '%s' % (self.data()[0].measures[0],)])
for row in table:
for cell in row:
assert cell != 's'
return table
def javascript(self):
l = []
l.append('var measure = %s;' % json.dumps(self.measure.to_json()))
l.append('var loop = %s;' % json.dumps(self.loop.to_json() if self.loop else None))
l.append('var drilldown = %s;' % json.dumps([dim.to_json() for dim in self.drilldown]))
l.append('var data = %s;' % json.dumps(self.json_data()))
return mark_safe('\n'.join(l))
def json_data(self):
json_data = []
def cell_value(cell):
value = cell.value
if isinstance(value, decimal.Decimal):
value = float(value)
if isinstance(value, datetime.timedelta):
value = value.days + value.seconds / 86400.0
return value
if len(self.drilldown) == 2:
(x_axis, y_axis), grid = self.table_2d()
cells = (
(['%s' % x.label, '%s' % y.label], cell_value(grid[(x.id, y.id)]))
for x in x_axis
for y in y_axis
)
elif len(self.drilldown) == 1:
axis, grid = self.table_1d()
cells = ((['%s' % x.label], cell_value(grid[x.id])) for x in axis)
elif len(self.drilldown) == 0:
for row in self.data():
cells = [([], cell_value(row.measures[0]))]
break
else:
cells = [([], self.measure.default_value)]
else:
raise NotImplementedError
for coords, value in cells:
json_data.append(
{
'coords': [{'value': coord} for coord in coords],
'measures': [{'value': value}],
}
)
return json_data
def ods(self):
workbook = Workbook()
for visualization in self:
sheet_name = re.sub('[^a-zA-Z ]', '', visualization.table_title)
sheet = workbook.add_sheet(sheet_name)
sheet.write(0, 0, self.title())
for j, row in enumerate(visualization.table()):
for i, value in enumerate(row):
if self.measure.type == 'integer':
try:
value = int(value)
except (ValueError, TypeError):
pass
sheet.write(j + 1, i, 0 if value is None else value)
return workbook
def title(self):
l = []
if self.measure:
l.append(self.measure.label)
if self.drilldown_x:
l.append(self.drilldown_x.label)
if self.drilldown_y:
l.append(self.drilldown_y.label)
if self.loop:
l.append(self.loop.label)
return ', '.join(l)
def __iter__(self):
if self.loop:
members = list(self.loop.members(self.filters.items()))
for member in members:
table = self.copy()
table.loop = None
table.filters[self.loop.name] = [member.id]
table.table_title = member.label
yield table
else:
self.table_title = self.title()
yield self