overhaul of query to table transformation (#38067)

* during query obtain dimension id and label if a different projection
is defined
* use object to materialize query results : Cells, DimensionCell,
MeasureCell
* handle stringification in the *Cell classes
* never ignore NULL dimension's values (it's detected in
Visualization.data() and added to the list of dimension members)
* sum of columns is only computed if there are more than one column
* sum of rows is only computed if there are more than one row
* full sum is only computed if there are more thant one column and more
than one row
* 1 dimension table are computed in the same maner as 2 dimensions
tables, no more discrepancies
* JSON web-service now use the same base methods table_2d() and table_1d()
as the native rendering in bijoe
* EngineDimension.members is specialized for bool dimensions (as it's
always True/False)
This commit is contained in:
Benjamin Dauvergne 2019-11-30 03:59:00 +01:00
parent 6402c7ea99
commit 72e8d4c83e
5 changed files with 4891 additions and 951 deletions

View File

@ -14,12 +14,12 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import collections
import contextlib
import logging
import itertools
import hashlib
import collections
import psycopg2
from django.core.cache import cache
@ -31,6 +31,71 @@ psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
class DimensionCell(collections.namedtuple('_Cell', ['dimension', 'value', 'value_label'])):
@property
def label(self):
if self.value_label:
return self.value_label
if self.value is None:
return self.dimension.absent_label
elif self.dimension.type == 'bool':
return _('Yes') if self.value else _('No')
else:
return unicode(self.value)
def __unicode__(self):
return unicode(self.label)
class MeasureCell(collections.namedtuple('_Cell', ['measure', 'value'])):
@property
def label(self):
value = self.value
if self.measure.type == 'percent':
if value is None:
return _('N/A')
else:
try:
return (u'%4.2f' % float(value)).replace('.', ',') + u' %'
except TypeError:
return _('N/A')
elif self.measure.type == 'duration':
if value is None:
return u'0'
else:
s = u''
if value.days:
s += u'%d jour(s)' % value.days
if value.seconds / 3600:
s += u' %d heure(s)' % (value.seconds / 3600)
if not s:
s = u'moins d\'1 heure'
return s
elif self.measure.type == 'bool':
if value is None:
return _('N/A')
else:
return _('Yes') if value else _('No')
elif self.measure.type == 'integer':
if value is None:
return '0'
else:
return unicode(value)
else:
raise NotImplementedError('unknown type %s' % self.measure.type)
def __unicode__(self):
return unicode(self.label)
class Cells(collections.namedtuple('Cells', ['dimensions', 'measures'])):
def __new__(cls, dimensions=[], measures=[]):
dimensions = list(dimensions)
measures = list(measures)
return super(Cells, cls).__new__(cls, dimensions, measures)
def quote(s):
return '"%s"' % s.replace('"', '\\"')
@ -72,6 +137,9 @@ class EngineDimension(object):
def members(self):
assert self.type != 'date'
if self.type == 'bool':
return [Member(id=True, label=_('Yes')), Member(id=False, label=_('No'))]
members = cache.get(self.cache_key)
if members is not None:
return members
@ -110,7 +178,7 @@ class EngineDimension(object):
for row in cursor.fetchall():
if row[0] is None:
continue
members.append(Member(*row))
members.append(Member(id=row[0], label=unicode(row[1])))
cache.set(self.cache_key, members, 600)
return members
@ -295,11 +363,11 @@ class EngineCube(object):
else:
where.append(condition)
for dimension_name in drilldown:
dimension = self.dimensions[dimension_name]
for dimension in drilldown:
joins.update(dimension.join or [])
projections.append('%s AS %s' % (dimension.value_label or dimension.value,
dimension.name))
projections.append('%s AS %s' % (dimension.value, dimension.name + '_value'))
if dimension.value_label:
projections.append('%s AS %s' % (dimension.value_label, dimension.name + '_label'))
group_by.append(dimension.group_by or dimension.value)
order_by.extend(dimension.order_by or [dimension.value])
@ -307,8 +375,7 @@ class EngineCube(object):
if order_value not in group_by:
group_by.append(order_value)
for measure_name in measures:
measure = self.get_measure(measure_name)
for measure in measures:
if measure.expression not in projections:
projections.append(measure.expression + ' AS ' + measure.name)
sql = 'SELECT ' + ', '.join(projections)
@ -334,23 +401,32 @@ class EngineCube(object):
self.engine.log.debug('%s.%s query filters=%s drilldown=%s measures=%s',
self.engine.warehouse.name, self.cube.name, filters, drilldown,
measures)
cells = []
for dimension_name in drilldown:
cells.append(self.dimensions[dimension_name])
for measure_name in measures:
cells.append(self.measures[measure_name])
with self.engine.get_cursor() as cursor:
sql = self.sql_query(filters=filters, drilldown=drilldown, measures=measures, **kwargs)
self.engine.log.debug('SQL: %s', sql)
cursor.execute(sql)
for row in cursor.fetchall():
yield [{
'name': cell.name,
'label': cell.label,
'type': cell.type,
'value': value,
'kind': 'dimension' if isinstance(cell, EngineDimension) else 'measure',
} for cell, value in zip(cells, row)]
cells = Cells()
j = 0
for dimension in drilldown:
value = row[j]
if not dimension.value_label:
value_label = None
j += 1
else:
value_label = row[j + 1]
j += 2
cells.dimensions.append(DimensionCell(
dimension=dimension,
value=value,
value_label=value_label,
))
for i, measure in enumerate(measures):
cells.measures.append(MeasureCell(
measure=measure,
value=row[j + i],
))
yield cells
def build_table_expression(self, joins, table_name, other_conditions=None):
'''Recursively build the table expression from the join tree,

View File

@ -20,16 +20,17 @@ import hashlib
import datetime
import decimal
import copy
import collections
from django.core.cache import cache
from django.utils.safestring import mark_safe
from django.utils.translation import ugettext_lazy as _
from django.utils import six
from django.core.cache import cache
from django.http import Http404
from django.conf import settings
from ..utils import get_warehouses
from ..engine import Engine
from ..engine import Engine, Member, MeasureCell
from .ods import Workbook
@ -146,38 +147,24 @@ class Visualization(object):
key = '$'.join(v.encode('utf8') for v in keys)
return hashlib.md5(key).hexdigest()
def stringified(self):
data = self.cached()
for row in data:
for cell in row:
value = cell['value']
if cell['type'] == 'percent':
try:
value = ('%4.2f' % float(value)).replace('.', ',') + u' %'
except:
value = _('Not applicable')
elif value is not None and cell['type'] == 'duration':
s = ''
if value.days:
s += '%d jour(s)' % value.days
if value.seconds / 3600:
s += ' %d heure(s)' % (value.seconds / 3600)
if not s:
s = 'moins d\'1 heure'
value = s
elif value is not None and cell['type'] == 'bool':
value = _('Yes') if value else _('No')
elif value is None and cell['type'] in ('duration', 'integer') and cell['kind'] == 'measure':
value = 0
elif value is None:
value = _('None')
cell['value'] = value
return data
def data(self):
return self.cube.query(self.filters.items(),
[dim.name for dim in self.drilldown],
[self.measure.name])
'''Execute aggregation query, list members and check None values in
dimensions.
'''
rows = list(self.cube.query(self.filters.items(),
self.drilldown,
[self.measure]))
self.members = {dimension: list(dimension.members) for dimension in self.drilldown}
seen_none = set()
for cells in rows:
# Keep "empty" dimension value if there is a non-zero measure associated
if any(measure.value for measure in cells.measures):
for cell in cells.dimensions:
if cell.value is None:
if cell.dimension not in seen_none:
self.members[cell.dimension].append(Member(None, cell.dimension.absent_label))
seen_none.add(cell.dimension)
return rows
def cached(self):
key = self.key
@ -188,63 +175,102 @@ class Visualization(object):
cache.set(key, data)
return data
def default_cell(self):
return MeasureCell(measure=self.measure, value=None)
def table_2d(self):
'''Layout data into 2d tables'''
assert len(self.drilldown) == 2
data = self.data()
x_axis = self.members[self.drilldown_x]
y_axis = self.members[self.drilldown_y]
grid = collections.defaultdict(self.default_cell)
for cells in data:
x_id = cells.dimensions[0].value
y_id = cells.dimensions[1].value
grid[(x_id, y_id)] = cells.measures[0]
return (x_axis, y_axis), grid
def table_1d(self):
assert len(self.drilldown) == 1
data = self.data()
if self.drilldown_x:
axis = self.members[self.drilldown_x]
else:
axis = self.members[self.drilldown_y]
grid = collections.defaultdict(self.default_cell)
for cells in data:
grid[cells.dimensions[0].value] = cells.measures[0]
return axis, grid
def table(self):
table = []
if len(self.drilldown) == 2:
if self.measure.type == 'integer':
default = 0
elif self.measure.type == 'duration':
default = '0 s'
elif self.measure.type == 'percent':
default = '0 %'
else:
raise NotImplementedError(self.measure.type)
(x_axis, y_axis), grid = self.table_2d()
x_labels = [x.label for x in self.drilldown_x.members]
y_labels = [y.label for y in self.drilldown_y.members]
used_x_label = set()
used_y_label = set()
# Only compute sum of cells for count() measures
compute_sums = self.measure.expression.lower().startswith('count(')
compute_lines_sums = compute_sums and len(x_axis) > 1
compute_columns_sums = compute_sums and len(y_axis) > 1
compute_global_sum = compute_lines_sums and compute_columns_sums
sums_columns = collections.defaultdict(lambda: 0)
sums_lines = collections.defaultdict(lambda: 0)
sum_table = 0
grid = {(x, y): default for x in x_labels for y in y_labels}
for coord in grid:
value = grid[coord].value
if value is not None:
if compute_columns_sums:
sums_columns[coord[0]] += value
if compute_lines_sums:
sums_lines[coord[1]] += value
if compute_global_sum:
sum_table += value
for row in self.stringified():
x_label = unicode(row[0]['value'])
y_label = unicode(row[1]['value'])
used_x_label.add(x_label)
used_y_label.add(y_label)
grid[(x_label, y_label)] = row[2]['value']
table.append([''] + [x for x in x_labels if x in used_x_label])
for y in y_labels:
if y not in used_y_label:
continue
table.append([y] + [grid[(x, y)] for x in x_labels if x in used_x_label])
if self.measure.expression.lower().startswith('count('):
# ajout des totaux horizontaux
table[0].append(_('Total'))
for row in table[1:]:
row.append(sum(v or 0 for v in row[1:]))
table.append([_('Total')])
for i in range(1, len(table[0])):
table[-1].append(sum([
row[i] or 0 for row in table[1:-1]]))
return table
table.append([''] + [x.label for x in x_axis])
# line sums header
if compute_lines_sums:
table[-1].append(_('Total'))
for y in y_axis:
table.append([y.label])
table[-1].extend(unicode(grid[(x.id, y.id)]) for x in x_axis)
# line sums
if compute_lines_sums:
table[-1].append(sums_lines[y.id])
# columns sums
if compute_columns_sums:
table.append([_('Total')] + [sums_columns[x.id] for x in x_axis])
if compute_global_sum:
table[-1].append(sum_table)
elif self.drilldown_x:
x_axis, grid = self.table_1d()
table.append([self.drilldown_x.label])
table.append([self.measure.label])
for row in self.stringified():
table[0].append(row[0]['value'])
table[1].append(row[1]['value'])
for x in x_axis:
table[0].append(x.label)
table[1].append(unicode(grid[x.id])),
elif self.drilldown_y:
y_axis, grid = self.table_1d()
table.append([self.drilldown_y.label, self.measure.label])
for row in self.stringified():
for y in y_axis:
table.append([
row[0]['value'],
row[1]['value']
y.label,
unicode(grid[y.id]),
])
else:
value = self.stringified()[0][0]['value']
table.append([self.measure.label, value])
table.append([self.measure.label, unicode(self.data()[0].measures[0])])
for row in table:
for cell in row:
assert cell != 's'
return table
def javascript(self):
@ -257,32 +283,47 @@ class Visualization(object):
def json_data(self):
json_data = []
for row in self.data():
coords = []
for cell in row[:len(self.drilldown)]:
coords.append(cell)
measures = []
for cell in row[len(self.drilldown):]:
if isinstance(cell['value'], decimal.Decimal):
cell['value'] = float(cell['value'])
if isinstance(cell['value'], datetime.timedelta):
cell['value'] = cell['value'].days + cell['value'].seconds / 86400.
measures.append(cell)
json_data.append({'coords': coords, 'measures': measures})
def cell_value(cell):
value = cell.value
if isinstance(value, decimal.Decimal):
value = float(value)
if isinstance(value, datetime.timedelta):
value = value.days + value.seconds / 86400.
return value
if len(self.drilldown) == 2:
(x_axis, y_axis), grid = self.table_2d()
cells = (([x.label, y.label], cell_value(grid[(x.id, y.id)])) for x in x_axis for y in y_axis)
elif len(self.drilldown) == 1:
axis, grid = self.table_1d()
cells = (([x.label], cell_value(grid[x.id])) for x in axis)
else:
raise NotImplementedError
for coords, value in cells:
json_data.append({
'coords': [{'value': coord} for coord in coords],
'measures': [{'value': value}],
})
return json_data
def ods(self):
workbook = Workbook()
full_title = self.title()
for table in self:
sheet_name = re.sub('[^a-zA-Z ]', '', table.table_title)
for visualization in self:
sheet_name = re.sub('[^a-zA-Z ]', '', visualization.table_title)
sheet = workbook.add_sheet(sheet_name)
sheet.write(0, 0, self.title())
sheet.write(0, 0, full_title)
for j, row in enumerate(table.table()):
for j, row in enumerate(visualization.table()):
for i, value in enumerate(row):
if self.measure.type == 'integer':
try:
value = int(value)
except ValueError:
pass
sheet.write(j + 1, i, 0 if value is None else value)
return workbook
@ -301,15 +342,11 @@ class Visualization(object):
def __iter__(self):
if self.loop:
members = list(self.loop.members)
d = list(self.cube.query(self.filters.items(), [self.loop.name],
[self.measure.name]))
names = [unicode(x[0]['value']) for x in d]
members = [m for m in members if unicode(m.label) in names]
for member in members:
table = self.copy()
table.loop = None
table.filters[self.loop.name] = [member.id]
table.table_title = unicode(member.label)
table.table_title = member.label
yield table
else:
self.table_title = self.title()

View File

@ -256,15 +256,16 @@ class VisualizationGeoJSONView(generics.GenericAPIView):
visualization.measure = visualization.cube.measures['geolocation']
drilldown = visualization.drilldown
geojson = []
for row in visualization.data():
properties = {}
for dim in row[:len(drilldown)]:
properties[dim['label']] = dim['value']
for cell in row[:len(drilldown)]:
properties[cell.label] = unicode(cell)
geojson.append({
'type': 'Feature',
'geometry': {
'type': 'MultiPoint',
'coordinates': row[len(drilldown)]['value'] or [],
'coordinates': [unicode(cell) for cell in row[len(drilldown)]]
},
'properties': properties,
})
@ -275,13 +276,14 @@ class VisualizationJSONView(generics.GenericAPIView):
permission_classes = ()
queryset = models.Visualization.objects.all()
def get(self, request, pk, format=None):
def cell_value(cell):
if cell['type'] == 'duration' and cell['value'] is not None:
return cell['value'].total_seconds()
return cell['value']
if cell.measure.type == 'duration' and cell.value is not None:
return cell.value.total_seconds()
return cell.value
def labels(axis):
return [x.label.strip() for x in axis]
instance = self.get_object()
loop = []
@ -289,37 +291,24 @@ class VisualizationJSONView(generics.GenericAPIView):
for visualization in all_visualizations:
drilldowns = visualization.drilldown
if len(drilldowns) == 2:
x_labels = [x.label for x in visualization.drilldown_x.members]
y_labels = [y.label for y in visualization.drilldown_y.members]
used_x_labels = OrderedDict()
used_y_labels = OrderedDict()
default = 0
grid = {(x, y): default for x in x_labels for y in y_labels}
for row in visualization.data():
x_label = unicode(row[0]['value'])
y_label = unicode(row[1]['value'])
used_x_labels[x_label] = True
used_y_labels[y_label] = True
grid[(x_label, y_label)] = cell_value(row[2])
(x_axis, y_axis), grid = visualization.table_2d()
axis = {
'x_labels': labels(x_axis),
'y_labels': labels(y_axis),
}
data = []
for y in used_y_labels.keys():
data.append([grid[(x, y)] for x in used_x_labels.keys()])
axis = {
'x_labels': [x.strip() for x in used_x_labels.keys()],
'y_labels': [x.strip() for x in used_y_labels.keys()]
}
for y in y_axis:
data.append([cell_value(grid[(x.id, y.id)]) for x in x_axis])
elif len(drilldowns) == 1:
table = list(visualization.data())
axis_data = [force_text(x[0]['value'] or '').strip() for x in table]
data = [cell_value(x[1]) for x in table]
x_axis, grid = visualization.table_1d()
if visualization.drilldown_x:
axis = {'x_labels': axis_data}
axis = {'x_labels': labels(x_axis)}
else:
axis = {'y_labels': axis_data}
axis = {'y_labels': labels(x_axis)}
data = [cell_value(grid[x.id]) for x in x_axis]
elif len(drilldowns) == 0:
data = cell_value(list(list(visualization.data())[0])[0])
data = cell_value(visualization.data()[0].measures[0])
axis = {}
loop.append({
'data': data,

File diff suppressed because it is too large Load Diff

View File

@ -45,15 +45,15 @@ def test_truncated_previous_year_range(schema1, app, admin, freezer):
freezer.move_to('2019-01-01 01:00:00')
response = form.submit('visualize')
assert get_table(response) == [
['', 'Total'],
['Total', '0']
['', 'janvier', u'f\xe9vrier', 'mars', 'avril', 'mai', 'juin', 'juillet', u'ao\xfbt', 'Total'],
['2017', '0', '0', '0', '0', '0', '0', '0', '0', '0'],
]
freezer.move_to('2018-01-01 01:00:00')
response = form.submit('visualize')
assert get_table(response) == [
['', 'janvier', u'f\xe9vrier', 'mars', 'avril', 'mai', 'juin', 'juillet', u'ao\xfbt', 'Total'],
['2017', '10', '1', '1', '1', '1', '1', '1', '1', '17'],
['Total', '10', '1', '1', '1', '1', '1', '1', '1', '17'],
]
@ -66,10 +66,10 @@ def test_boolean_dimension(schema1, app, admin):
form.set('measure', 'simple_count')
form.set('drilldown_x', 'boolean')
response = form.submit('visualize')
assert get_table(response) == [['Boolean', 'Non', 'Oui'], ['number of rows', '9', '8']]
assert get_table(response) == [['Boolean', 'Oui', 'Non'], ['number of rows', '8', '9']]
form.set('filter__boolean', [o[0] for o in form.fields['filter__boolean'][0].options if o[2] == 'Oui'][0])
response = form.submit('visualize')
assert get_table(response) == [['Boolean', 'Oui'], ['number of rows', '8']]
assert get_table(response) == [['Boolean', 'Oui', 'Non'], ['number of rows', '8', '0']]
def test_string_dimension(schema1, app, admin):
@ -84,7 +84,7 @@ def test_string_dimension(schema1, app, admin):
assert get_table(response) == [['String', 'a', 'b', 'c', 'Aucun(e)'], ['number of rows', '11', '2', '3', '1']]
form.set('filter__string', ['a', 'b', '__none__'])
response = form.submit('visualize')
assert get_table(response) == [['String', 'a', 'b', 'Aucun(e)'], ['number of rows', '11', '2', '1']]
assert get_table(response) == [['String', 'a', 'b', 'c', 'Aucun(e)'], ['number of rows', '11', '2', '0', '1']]
def test_item_dimension(schema1, app, admin):
@ -103,7 +103,11 @@ def test_item_dimension(schema1, app, admin):
]
form.set('filter__outersubcategory', ['__none__'])
response = form.submit('visualize')
assert get_table(response) == [['Outer SubCategory', 'Aucun(e)'], ['number of rows', '1']]
assert get_table(response) == [
['Outer SubCategory', u'sub\xe94', u'sub\xe95', u'sub\xe96', u'sub\xe98',
u'sub\xe99', u'sub\xe97', u'sub\xe92', u'sub\xe93', u'sub\xe91', 'Aucun(e)'],
['number of rows', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1']
]
def test_yearmonth_drilldown(schema1, app, admin):
@ -160,13 +164,12 @@ def test_truncated_previous_year_range_on_datetime(schema1, app, admin, freezer)
freezer.move_to('2019-01-01 01:00:00')
response = form.submit('visualize')
assert get_table(response) == [
['', 'Total'],
['Total', '0']
['', 'janvier', u'f\xe9vrier', 'mars', 'avril', 'mai', 'juin', 'juillet', u'ao\xfbt', 'Total'],
['2017', '0', '0', '0', '0', '0', '0', '0', '0', '0'],
]
freezer.move_to('2018-01-01 01:00:00')
response = form.submit('visualize')
assert get_table(response) == [
['', 'janvier', u'f\xe9vrier', 'mars', 'avril', 'mai', 'juin', 'juillet', u'ao\xfbt', 'Total'],
['2017', '10', '1', '1', '1', '1', '1', '1', '1', '17'],
['Total', '10', '1', '1', '1', '1', '1', '1', '1', '17'],
]