diff --git a/MANIFEST.in b/MANIFEST.in index 0d8124e..096159e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,5 @@ include VERSION +include tox.ini +recursive-include tests *.py include create_dates.sql +recursive-include bijoe/templates *.html diff --git a/README.rst b/README.rst index 32ad7a9..c9c2381 100644 --- a/README.rst +++ b/README.rst @@ -2,22 +2,252 @@ BI for Publik ============= w.c.s. OLAP ------------ +~~~~~~~~~~~ Tool to export w.c.s. data in a database with star schema for making an OLAP -cube.:: +cube. - usage: wcs-olap --url URL [-h] --orig ORIG --key KEY - --pg-dsn PG_DSN +:: + + usage: wcs-olap [--no-feed] [-a | --url URL] [-h] [--orig ORIG] [--key KEY] + [--pg-dsn PG_DSN] [--schema SCHEMA] + [config_path] Export W.C.S. data as a star schema in a postgresql DB + positional arguments: + config_path + optional arguments: - --url URL url of the w.c.s. instance - -h, --help show this help message and exit - --orig ORIG origin of the request for signatures - --key KEY HMAC key for signatures - --pg-dsn PG_DSN Psycopg2 DB DSN + --no-feed only produce the model + -a, --all synchronize all wcs + --url URL url of the w.c.s. instance + -h, --help show this help message and exit + --orig ORIG origin of the request for signatures + --key KEY HMAC key for signatures + --pg-dsn PG_DSN Psycopg2 DB DSN + --schema SCHEMA schema name + +Bi-Joe +~~~~~~ + +BI Joe is a library and a Django application to simplify querying a postgresql +database containing a star schema with BI annotations in order to +easily produce BI dashboards + +It's inspired by the Cubes project. + +Feature +~~~~~~~ + +* use PostgreSQL database as datastore, +* declare joins to define star schemas of your cubes +* declare dimensions as SQL query or exepression defining, label, group by, + ordering or member list. +* declare measure as SQL expression using aggregate functions + +Missing features +~~~~~~~~~~~~~~~~ + +* hierarchical dimensions +* measure necessiting a rollup (percentage based on count() of facts at an upper + level) + +Model +~~~~~ + +You declare your model using JSON files, those JSON files are targeted by a list +of glob patterns in the Djagno setting BIJOE_SCHEMAS. + +.. code:: python + + BIJOE_SCHEMAS = ['/var/lib/bijoe/cubes/*.model'] + +The JSON model files must conform to this schema: + +* name: technical identifier of the mode, a short string without space if + preferred + +* label: any string describing the model + +* pg_dsn: string describing the connection to PostgreSQL, as expected by + psycopg2, ex.: `"dbname=olap_db user=admin password=xxx"` + +* search_path: search path to set set if relations are not all in the public + schema, + +* cubes: the list of cube descriptors, + + * name: technical identifier of the cube, same remark thant for models, + + * label: as for model, + + * fact_table: name of the table storing facts + + * key: column of the table identifying individual facts + + * joins: list of equality joins, all joins are RIGHT OUTER JOINS, table are + cross joined when drill involve dimensions using multiple joins. + + * name: SQL identifier for naming the join, + + * table: name of the relation being joined, + + * master: table and column indicating the left part of the equality + condition for the join, you can use `mydim_id` to reference the fact + table or `otherjoin.mydim_id` to reference another join. + + * detail: name of the column on the joined table for the equality condition, + + * kind: type of join, must be `inner`, `left` or `right`, default is right. + + * dimensions: list of dimension descriptors, + + * name: technical identifier of the dimension, it will be used to name the + dimension in the API, + + * label: human description for the dimension, used in UIs, + + * join: list of join names, indicate that some joins must be used when using + this dimension, + + * type: indicate the type of the dimension, numerical, time-like, + geographical, duration, etc.. + + * value: SQL expression giving the value for the dimension, + it can be different than the value used for filtering or grouping, + * sql_filter: SQL expression that will be used in the SQL template + ` IN (...)` when filtering along a dimension, + + * value_label: SQL expression giving the shown value + + * group_by: SQL expression to group facts along a dimension, default is to + use value + + * order_by: SQL expression to order dimension values, default is to use + value + + * measures: list of measure descriptors + + * name: as for models, + + * label: as for models, + + * type: type of the measure: integer or duration, + + * expression: SQL expression indicating how to compute the aggregate, + ex.: `avg(delay)`, `count(product_type)`. + +Example ++++++++ + +.. code:: json + + { + "name" : "cam", + "label" : "cam", + "pg_dsn" : "dbname=wcs-olap", + "search_path" : [ + "cam", + "public" + ], + "cubes" : [ + { + "name" : "all_formdata", + "label" : "Tous les formulaires", + "fact_table" : "formdata", + "key" : "id", + "joins" : [ + { + "name" : "formdef", + "master" : "formdef_id", + "detail" : "id", + "table" : "formdef" + }, + { + "name" : "dates", + "master" : "receipt_time", + "detail" : "date", + "table" : "dates" + } + ], + "dimensions" : [ + { + "name" : "formdef", + "label" : "formulaire", + "type" : "integer", + "join" : ["formdef"], + "value" : "formdef.id", + "value_label" : "formdef.label" + }, + { + "join": [ + "receipt_time" + ], + "label": "date de la demande", + "name": "receipt_time", + "type": "date", + "value": "receipt_time.date" + }, + ], + "measures" : [ + { + "name": "count", + "label": "nombre de demandes", + "type": "integer", + "expression": "count({fact_table}.id)", + }, + { + "name" : "avg_endpoint_delay", + "label" : "Délai de traitement", + "type" : "duration", + "expression" : "avg(endpoint_delay)" + } + ] + } + ] + } + +API +~~~ + +Model description is handled by `bijoe.schema` and model querying by +`bijoe.engine`. + +bijoe.schema.Warehouse +++++++++++++++++++++++ + +`Warehouse` is the main class to manipulate models, it has two class methods: + +* `from_json(d)` which transform a dictionnary obtained by eventually parsing + some JSON file to a Warehouse object. + +* `to_json()` which transform a Warehouse object into a JSON compatible + dictionnary. + +bijoe.engine.Engine ++++++++++++++++++++ + +`Engine(warehouse)` is the entry-point for querying a model, you get an +`EngineCube` object by indexing the engine with the name of a cube. + +.. code:: python + + cube = Engine(warehouse)['mycube'] +You can query your cube using the `query` method. +.. code:: python + + cube.query(filters=[('year', [2013, 2014, 2015])], + drilldown=['year', 'product'], + measures=['count', 'avg_sale']) + +It returns a sequence of rows whose elements are the values of the drilldown +dimensions in the same order as in the query followed by the values of the +measures also in the same ordre as the query. + +The `count` measure is a special measure which is always present whose +expression is always `count({fact_table}.{key})` where `fact_table` and `key` +are the corresponding attributes of the cube. diff --git a/bijoe-ctl b/bijoe-ctl new file mode 100755 index 0000000..65ea9ae --- /dev/null +++ b/bijoe-ctl @@ -0,0 +1,10 @@ +#!/usr/bin/env python +import os +import sys + +if __name__ == "__main__": + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "bijoe.settings") + + from django.core.management import execute_from_command_line + + execute_from_command_line(sys.argv) diff --git a/bijoe/__init__.py b/bijoe/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bijoe/engine.py b/bijoe/engine.py new file mode 100644 index 0000000..799bbbc --- /dev/null +++ b/bijoe/engine.py @@ -0,0 +1,216 @@ +import logging + +import collections +import psycopg2 + + +def to_tuple(cur, values): + return cur.mogrify(', '.join(['%s'] * len(values)), values) + + +Member = collections.namedtuple('Member', ['id', 'label']) + + +class EngineDimension(object): + def __init__(self, engine, engine_cube, dimension): + self.engine = engine + self.engine_cube = engine_cube + self.dimension = dimension + + def __getattr__(self, name): + return getattr(self.dimension, name) + + @property + def members(self): + assert self.dimension.type != 'date' + cursor = self.engine.get_cursor() + if self.dimension.join: + join = self.engine_cube.get_join(self.dimension.join[-1]) + sql = 'SELECT %s AS value, %s::text AS label FROM %s AS %s ORDER BY %s' % ( + self.value, self.value_label or self.value, join.table, join.name, self.order_by or + self.value) + else: + sql = 'SELECT %s AS value, %s::text AS label FROM {fact_table} ORDER BY %s' % ( + self.value, self.value_label or self.value, self.self.order_by or self.value) + sql = sql.format(fact_table=self.engine_cube.fact_table) + self.engine.log.debug('SQL: %s', sql) + cursor.execute(sql) + for row in cursor.fetchall(): + yield Member(*row) + + +class EngineMeasure(object): + def __init__(self, engine, engine_cube, measure): + self.engine = engine + self.engine_cube = engine_cube + self.measure = measure + + def __getattr__(self, name): + return getattr(self.measure, name) + + +class ProxyList(object): + def __init__(self, engine, engine_cube, attribute, cls): + self.engine = engine + self.engine_cube = engine_cube + self.attribute = attribute + self.cls = cls + + def __iter__(self): + return (self.cls(self.engine, self.engine_cube, o) + for o in getattr(self.engine_cube.cube, self.attribute)) + + def __getitem__(self, name): + for o in getattr(self.engine_cube.cube, self.attribute): + if o.name == name: + return self.cls(self.engine, self.engine_cube, o) + raise KeyError + + +class ProxyListDescriptor(object): + def __init__(self, attribute, cls): + self.attribute = attribute + self.cls = cls + + def __get__(self, obj, t=None): + return ProxyList(obj.engine, obj, self.attribute, self.cls) + + +class EngineCube(object): + dimensions = ProxyListDescriptor('all_dimensions', EngineDimension) + measures = ProxyListDescriptor('measures', EngineMeasure) + + def __init__(self, warehouse, cube): + self.engine = warehouse + self.cube = cube + + def __getattr__(self, name): + return getattr(self.cube, name) + + def sql_query(self, filters, drilldown, measures, **kwargs): + cursor = self.engine.get_cursor() + + projections = [] + joins = set() + where = [] + group_by = [] + order_by = [] + + for dimension_name, values in filters: + dimension = self.cube.get_dimension(dimension_name) + assert dimension.filter + condition, values = dimension.filter(values) + condition = cursor.mogrify(condition, values) + where.append(condition) + joins.update(dimension.join) + + for dimension_name in drilldown: + dimension = self.cube.get_dimension(dimension_name) + joins.update(dimension.join) + projections.append('%s AS %s' % (dimension.value_label or dimension.value, + dimension.name)) + group_by.append(dimension.group_by or dimension.value) + order_by.append(dimension.order_by or dimension.value) + + for measure_name in measures: + measure = self.cube.get_measure(measure_name) + if measure.expression not in projections: + projections.append(measure.expression + ' AS ' + measure.name) + sql = 'SELECT ' + ', '.join(projections) + table_expression = ' %s' % self.cube.fact_table + if joins: + join_tree = {} + for join_name in joins: + join = self.cube.get_join(join_name) + if '.' in join.master: + master_table = join.master.split('.', 1)[0] + else: + master_table = self.fact_table + join_tree.setdefault(master_table, {}).setdefault(join.kind, {})[join.name] = join + + def build_table_expression(table_name, alias=None, top=True): + sql = table_name + if alias: + sql += ' AS %s' % alias + add_paren = False + for kind in ['left', 'inner', 'right']: + joins = join_tree.get(table_name, {}).get(kind) + if not joins: + continue + add_paren = True + join_kinds = { + 'inner': 'INNER JOIN', + 'left': 'LEFT OUTER JOIN', + 'right': 'RIGHT OUTER JOIN', + } + sql += ' %s ' % join_kinds[kind] + sub_joins = [] + conditions = [] + for join_name, join in joins.iteritems(): + sub_joins.append( + build_table_expression(join.table, join.name, top=False)) + conditions.append('%s.%s = %s.%s' % (alias or table_name, + join.master.split('.')[-1], + join.name, join.detail)) + sub_sql = ' CROSS JOIN '.join(sub_joins) + if len(sub_joins) > 1: + sub_sql = '(%s)' % sub_sql + sql += sub_sql + sql += ' ON %s' % ' AND '.join(conditions) + if not top and add_paren: + sql = '(%s)' % sql + return sql + + table_expression = build_table_expression(self.fact_table) + sql += ' FROM %s' % table_expression + where_conditions = 'true' + if where: + where_conditions = ' AND '.join(where) + sql += ' WHERE %s' % where_conditions + if group_by: + sql += ' GROUP BY %s' % ', '.join(group_by) + if order_by: + sql += ' ORDER BY %s' % ', '.join(order_by) + sql = sql.format(fact_table=self.cube.fact_table, + table_expression=table_expression, + where_conditions=where_conditions) + sql = sql.format(fact_table=self.cube.fact_table, + table_expression=table_expression, + where_conditions=where_conditions) + return sql + + def query(self, filters, drilldown, measures, **kwargs): + self.engine.log.debug('%s.%s query filters=%s drilldown=%s measures=%s', + self.engine.warehouse.name, self.cube.name, filters, drilldown, + measures) + cells = [] + for dimension_name in drilldown: + cells.append(self.dimensions[dimension_name]) + for measure_name in measures: + cells.append(self.measures[measure_name]) + cursor = self.engine.get_cursor() + sql = self.sql_query(filters=filters, drilldown=drilldown, measures=measures, **kwargs) + self.engine.log.debug('SQL: %s', sql) + cursor.execute(sql) + for row in cursor.fetchall(): + yield zip(cells, row) + + +class Engine(object): + def __init__(self, warehouse): + self.warehouse = warehouse + self.log = logging.getLogger(__name__) + + def __getitem__(self, name): + return EngineCube(self, self.warehouse.get_cube(name)) + + def __getattr__(self, name): + return getattr(self.warehouse, name) + + def get_cursor(self): + connection = psycopg2.connect( + self.warehouse.pg_dsn) + cursor = connection.cursor() + search_path = ', '.join(['"%s"' % namespace for namespace in self.warehouse.search_path]) + cursor.execute('SET SEARCH_PATH = %s' % search_path) + return cursor diff --git a/bijoe/forms.py b/bijoe/forms.py new file mode 100644 index 0000000..3aada31 --- /dev/null +++ b/bijoe/forms.py @@ -0,0 +1,71 @@ +import datetime + +from django import forms +from django.utils.translation import ugettext as _ + +from django_select2.forms import Select2MultipleWidget + + +class DateRangeWidget(forms.MultiWidget): + def __init__(self, attrs=None): + attrs2 = {'type': 'date'} + if attrs: + attrs2.update(attrs) + widgets = ( + forms.DateInput(attrs=attrs2.copy()), + forms.DateInput(attrs=attrs2.copy()), + ) + super(DateRangeWidget, self).__init__(widgets, attrs=attrs) + + def decompress(self, value): + if not value: + return None, None + return value + + +class DateRangeField(forms.MultiValueField): + widget = DateRangeWidget + + def __init__(self, *args, **kwargs): + # Or define a different message for each field. + fields = ( + forms.DateField(required=False), + forms.DateField(required=False), + ) + super(DateRangeField, self).__init__(fields=fields, require_all_fields=False, *args, + **kwargs) + + def compress(self, values): + return values + + +class CubeForm(forms.Form): + def __init__(self, *args, **kwargs): + self.cube = cube = kwargs.pop('cube') + super(CubeForm, self).__init__(*args, **kwargs) + + # filters + for dimension in cube.dimensions: + if not dimension.filter: + continue + field_name = 'filter__%s' % dimension.name + if dimension.type is datetime.date: + self.fields[field_name] = DateRangeField( + label=dimension.label.capitalize(), required=False) + else: + self.fields[field_name] = forms.MultipleChoiceField( + label=dimension.label.capitalize(), + choices=dimension.members, + required=False, + widget=Select2MultipleWidget()) + + # group by + choices = [(dimension.name, dimension.label) for dimension in cube.dimensions + if dimension.type not in (datetime.datetime, datetime.date)] + self.fields['drilldown'] = forms.MultipleChoiceField( + label=_('Group by'), choices=choices, required=False, widget=Select2MultipleWidget()) + + # measures + choices = [(measure.name, measure.label) for measure in cube.measures] + self.fields['measures'] = forms.MultipleChoiceField( + label=_('Measures'), choices=choices, widget=Select2MultipleWidget()) diff --git a/bijoe/schemas.py b/bijoe/schemas.py new file mode 100644 index 0000000..3679a17 --- /dev/null +++ b/bijoe/schemas.py @@ -0,0 +1,297 @@ +# -*- coding: utf-8 -*- +import datetime +import decimal +import collections + + +TYPE_MAP = { + 'duration': datetime.timedelta, + 'date': datetime.date, + 'integer': int, + 'decimal': decimal.Decimal, + 'percent': float, +} + + +class SchemaError(Exception): + pass + + +def type_to_json(t): + for k, v in TYPE_MAP.items(): + if t is v: + return k + + +def type_cast(t): + if isinstance(t, type): + return t + else: + return TYPE_MAP[t] +type_cast.to_json = type_to_json + + +class Base(object): + __types__ = {} + + def __init__(self, **kwargs): + for k, v in kwargs.iteritems(): + setattr(self, k, v) + + @classmethod + def slots(cls): + s = set() + for cls in reversed(cls.__mro__): + if hasattr(cls, '__slots__'): + s.update(cls.__slots__) + return s + + @classmethod + def types(cls): + d = {} + for cls in reversed(cls.__mro__): + if hasattr(cls, '__types__'): + d.update(cls.__types__) + return d + + @classmethod + def from_json(cls, d): + assert hasattr(d, 'keys') + slots = cls.slots() + assert set(d.keys()) <= set(slots), \ + 'given keys %r does not match %s.__slots__: %r' % (d.keys(), cls.__name__, slots) + types = cls.types() + + kwargs = {} + for key in slots: + assert key in d or hasattr(cls, key), \ + '%s.%s is is a mandatory attribute' % (cls.__name__, key) + if not key in d: + continue + value = d[key] + if key in types: + kls = types[key] + if isinstance(kls, list): + kls = kls[0] + if hasattr(kls, 'from_json'): + value = [kls.from_json(v) for v in value] + else: + value = [kls(v) for v in value] + elif hasattr(kls, 'from_json'): + value = kls.from_json(value) + else: + value = kls(value) + kwargs[key] = value + return cls(**kwargs) + + def to_json(self): + d = {} + types = self.types() + for attr in self.slots(): + try: + v = getattr(self, attr) + except AttributeError: + pass + else: + if attr in types and hasattr(types[attr], 'to_json'): + v = types[attr].to_json(v) + if isinstance(v, list): + v = [x.to_json() if hasattr(x, 'to_json') else x for x in v] + d[attr] = v + return d + + def __repr__(self): + kwargs = ['%s=%r' % (key, getattr(self, key)) for key in self.slots() if hasattr(self, key)] + return '<%s %s>' % (self.__class__.__name__, ' '.join(kwargs)) + + +class Measure(Base): + __slots__ = ['name', 'label', 'type', 'expression'] + __types__ = { + 'name': str, + 'label': unicode, + 'type': type_cast, + 'expression': str, + } + + +class Dimension(Base): + __slots__ = ['name', 'label', 'type', 'join', 'value', 'value_label', + 'order_by', 'group_by', 'filter_in_join', 'filter'] + __types__ = { + 'name': str, + 'label': unicode, + 'type': type_cast, + 'join': [str], + 'value': str, + 'value_label': str, + 'order_by': str, + 'group_by': str, + 'filter': bool, + } + + label = None + value_label = None + order_by = None + group_by = None + join = None + filter = True + + @property + def dimensions(self): + if self.type is datetime.date: + return [ + self, + Dimension( + label=u'année (%s)' % self.label, + + name=self.name + '__year', + type='integer', + join=self.join, + value='EXTRACT(year from %s)::integer' % self.value, + filter=False), + Dimension( + label=u'mois (%s)' % self.label, + name=self.name + '__month', + type='integer', + join=self.join, + value='EXTRACT(month from %s)' % self.value, + value_label='to_char(date_trunc(\'month\', %s), \'TMmonth\')' % self.value, + group_by='EXTRACT(month from %s), ' + 'to_char(date_trunc(\'month\', %s), \'TMmonth\')' + % (self.value, self.value), + filter=False), + Dimension( + label=u'jour de la semaine (%s)' % self.label, + name=self.name + '__dow', + type='integer', + join=self.join, + value='EXTRACT(dow from %s)' % self.value, + value_label='to_char(date_trunc(\'week\', current_date)::date ' + '+ EXTRACT(dow from %s)::integer, \'TMday\')' % self.value, + filter=False) + ] + return [self] + + def filter(self, filter_values): + if self.type is datetime.date: + assert len(filter_values) == 2 + filters = [] + values = [] + try: + if filter_values[0]: + filters.append('%s >= %%s' % self.value) + values.append(filter_values[0]) + except IndexError: + pass + try: + if filter_values[1]: + filters.append('%s <= %%s' % self.value) + values.append(filter_values[1]) + except IndexError: + pass + return ' AND '.join(filters), values + else: + if not filter_values: + return '', [] + if self.type == 'integer': + values = map(int, filter_values) + else: + values = filter_values + s = ', '.join(['%s'] * len(values)) + return '%s IN (%s)' % (self.value, s), values + + +def join_kind(kind): + if kind not in ('inner', 'left', 'right'): + raise ValueError('bad joind kind: %s' % kind) + return kind + + +class Join(Base): + __slots__ = ['name', 'table', 'master', 'detail', 'kind'] + __types__ = { + 'name': str, + 'table': str, + 'master': str, + 'detail': str, + 'kind': join_kind, + } + + kind = 'right' + + +class Cube(Base): + __slots__ = ['name', 'label', 'fact_table', 'key', 'joins', 'dimensions', 'measures'] + __types__ = { + 'name': str, + 'label': unicode, + 'fact_table': str, + 'key': str, + 'joins': [Join], + 'dimensions': [Dimension], + 'measures': [Measure], + } + + joins = () + dimensions = () + measures = () + + def check(self): + names = collections.Counter() + names.update(join.name for join in self.joins) + names.update(dimension.name for dimension in self.dimensions) + names.update(measure.name for measure in self.measures) + duplicates = [k for k, v in names.iteritems() if v > 1] + if duplicates: + raise SchemaError( + 'More than one join, dimension or measure with name(s) %s' % ', '.join(duplicates)) + + @property + def all_dimensions(self): + for dimension in self.dimensions: + for sub_dimension in dimension.dimensions: + yield sub_dimension + + def get_dimension(self, name): + for dimension in self.dimensions: + for sub_dimension in dimension.dimensions: + if sub_dimension.name == name: + return sub_dimension + raise KeyError + + def get_join(self, name): + for join in self.joins: + if join.name == name: + return join + raise KeyError + + def get_measure(self, name): + for measure in self.measures: + if measure.name == name: + return measure + raise KeyError + + +class Warehouse(Base): + __slots__ = ['name', 'label', 'pg_dsn', 'search_path', 'cubes'] + __types__ = { + 'name': str, + 'label': unicode, + 'pg_dsn': str, + 'search_path': [str], + 'cubes': [Cube], + 'search_path': [str], + } + + def check(self): + names = collections.Counter(cube.name for cube in self.cubes) + duplicates = [k for k, v in names.iteritems() if v > 1] + if duplicates: + raise SchemaError('More than one cube with name(s) %s' % ', '.join(duplicates)) + + def get_cube(self, name): + for cube in self.cubes: + if cube.name == name: + return cube + raise KeyError diff --git a/bijoe/settings.py b/bijoe/settings.py new file mode 100644 index 0000000..8ac128e --- /dev/null +++ b/bijoe/settings.py @@ -0,0 +1,123 @@ +""" +Django settings for bijoe project. + +For more information on this file, see +https://docs.djangoproject.com/en/1.7/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/1.7/ref/settings/ +""" + +from django.conf import global_settings + +# Build paths inside the project like this: os.path.join(BASE_DIR, ...) +import os +BASE_DIR = os.path.dirname(os.path.dirname(__file__)) + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/1.7/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = '-4h1(4b!p-8)or!_!iw%&e89+$6(_yf#b^2e+=fc$e2)+h16m6' + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +TEMPLATE_DEBUG = True + +ALLOWED_HOSTS = [] + + +# Application definition + +INSTALLED_APPS = ( + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'xstatic.pkg.chartnew_js', + 'django_select2', + 'gadjo', + 'bijoe', +) + +MIDDLEWARE_CLASSES = ( + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.auth.middleware.SessionAuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +) + +STATICFILES_FINDERS = global_settings.STATICFILES_FINDERS + ('gadjo.finders.XStaticFinder',) + + +ROOT_URLCONF = 'bijoe.urls' + +WSGI_APPLICATION = 'bijoe.wsgi.application' + + +# Database +# https://docs.djangoproject.com/en/1.7/ref/settings/#databases + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), + } +} + +# Internationalization +# https://docs.djangoproject.com/en/1.7/topics/i18n/ + +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_L10N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/1.7/howto/static-files/ + +STATIC_URL = '/static/' + +# BiJoe settings + +LOGGING = { + 'version': 1, + 'disable_existing_loggers': True, + 'formatters': { + 'verbose': { + 'format': '[%(asctime)s] %(levelname)s %(name)s.%(funcName)s: %(message)s', + 'datefmt': '%Y-%m-%d %a %H:%M:%S' + }, + }, + 'handlers': { + 'console': { + 'level': 'DEBUG', + 'class': 'logging.StreamHandler', + 'formatter': 'verbose', + }, + }, + 'loggers': { + 'bijoe': { + 'handlers': ['console'], + 'level': 'DEBUG', + }, + }, +} + +BIJOE_SCHEMAS = ['./*.model'] + +if 'BIJOE_SETTING_FILE' in os.environ: + execfile(os.environ['BIJOE_SETTINGS_FILE']) diff --git a/bijoe/templates/bijoe/base.html b/bijoe/templates/bijoe/base.html new file mode 100644 index 0000000..111ab85 --- /dev/null +++ b/bijoe/templates/bijoe/base.html @@ -0,0 +1 @@ +{% extends "gadjo/base.html" %} diff --git a/bijoe/templates/bijoe/cube.html b/bijoe/templates/bijoe/cube.html new file mode 100644 index 0000000..34951a7 --- /dev/null +++ b/bijoe/templates/bijoe/cube.html @@ -0,0 +1,183 @@ +{% extends "bijoe/base.html" %} +{% load i18n staticfiles %} + +{% block extrascripts %} +{{ form.media.css }} + + +{% endblock %} + +{% block breadcrumb %} + +{% url 'homepage' as homepage_url %} +{% trans "Homepage" %} + +{% url 'warehouse' warehouse=warehouse.name as warehouse_url %} +{{ warehouse.label }} + +{{ cube.label }} +{% endblock %} + +{% block content %} +
+ {% csrf_token %} + +

Mesure(s)

+ {% with field=form.measures %} +

+ {{ field }} + {% if field.help_text %} + {{ field.help_text }} + {% endif %} +

+ {% endwith %} +

Regroupement(s)

+ {% with field=form.drilldown %} +

+ {{ field }} + {% if field.help_text %} + {{ field.help_text }} + {% endif %} +

+ {% endwith %} +

Filtre(s)

+ {{ field.label_tag }} + {% if field.errors %} + + {% endif %} + {{ field }} + {% if field.help_text %} + {{ field.help_text }} + {% endif %} +

+ {% endif %} + {% endfor %} +
+ {% if data %} + + + {% for dimension in drilldown %} + + {% endfor %} + {% for measure in measures %} + + {% endfor %} + + + {% for row in data %} + + {% for cell in row %} + + {% endfor %} + + {% endfor %} + +
{{ dimension.label.capitalize }}{{ measure.label.capitalize }}
{% if cell == None %}{% trans "None" %}{% else %}{{ cell }}{% endif %}
+ + {% else %} +
Veuillez choisir des mesures et des regroupements
+ {% endif %} +{% endblock %} + +{% block page-end %} +{{ form.media.js }} + +{% endblock %} diff --git a/bijoe/templates/bijoe/homepage.html b/bijoe/templates/bijoe/homepage.html new file mode 100644 index 0000000..e76f709 --- /dev/null +++ b/bijoe/templates/bijoe/homepage.html @@ -0,0 +1,16 @@ +{% extends "bijoe/base.html" %} +{% load i18n %} + +{% block breadcrumb %} +{% url 'homepage' as homepage_url %} +{% trans "Homepage" %} +{% endblock %} + +{% block content %} + +{% endblock %} diff --git a/bijoe/templates/bijoe/warehouse.html b/bijoe/templates/bijoe/warehouse.html new file mode 100644 index 0000000..73b26dd --- /dev/null +++ b/bijoe/templates/bijoe/warehouse.html @@ -0,0 +1,17 @@ +{% extends "bijoe/base.html" %} +{% load i18n %} + +{% block breadcrumb %} +{% url 'homepage' as homepage_url %} +{% trans "Homepage" %} +{{ warehouse.label }} +{% endblock %} + +{% block content %} + +{% endblock %} diff --git a/bijoe/urls.py b/bijoe/urls.py new file mode 100644 index 0000000..1e8015c --- /dev/null +++ b/bijoe/urls.py @@ -0,0 +1,12 @@ +from django.conf.urls import patterns, include, url +from django.contrib import admin + +import views + +urlpatterns = patterns( + '', + url(r'^$', views.HomepageView.as_view(), name='homepage'), + url(r'^(?P[^/]*)/$', views.WarehouseView.as_view(), name='warehouse'), + url(r'^(?P[^/]*)/(?P[^/]*)/$', views.CubeView.as_view(), name='cube'), + url(r'^admin/', include(admin.site.urls)), +) diff --git a/bijoe/utils.py b/bijoe/utils.py new file mode 100644 index 0000000..85ec596 --- /dev/null +++ b/bijoe/utils.py @@ -0,0 +1,14 @@ +import glob +import json + +from django.conf import settings + +from .schemas import Warehouse + + +def get_warehouses(): + warehouses = [] + for pattern in settings.BIJOE_SCHEMAS: + for path in glob.glob(pattern): + warehouses.append(Warehouse.from_json(json.load(open(path)))) + return warehouses diff --git a/bijoe/views.py b/bijoe/views.py new file mode 100644 index 0000000..bdd0d4d --- /dev/null +++ b/bijoe/views.py @@ -0,0 +1,134 @@ +import json +import datetime + +from django.views.generic import TemplateView, FormView +from django.http import Http404 +from django.utils import formats +from django.http import HttpResponse + +from .utils import get_warehouses +from .engine import Engine +from .forms import CubeForm +from .ods import Workbook + + +class HomepageView(TemplateView): + template_name = 'bijoe/homepage.html' + + def get_context_data(self, **kwargs): + ctx = super(HomepageView, self).get_context_data(**kwargs) + ctx['warehouses'] = get_warehouses() + return ctx + + +class WarehouseView(TemplateView): + template_name = 'bijoe/warehouse.html' + + def get_context_data(self, **kwargs): + ctx = super(WarehouseView, self).get_context_data(**kwargs) + try: + ctx['warehouse'] = [warehouse for warehouse in get_warehouses() if warehouse.name == + self.kwargs['warehouse']][0] + except IndexError: + raise Http404 + return ctx + + +class CubeMixin(object): + def get_data(self, cleaned_data, stringify=True): + cleaned_data = cleaned_data + filters = [] + for kw, values in cleaned_data.iteritems(): + if values and kw.startswith('filter__'): + dimension_name = kw[8:] + filters.append((dimension_name, values)) + measures = cleaned_data.get('measures', []) + drilldown = cleaned_data.get('drilldown', []) + data = [] + for row in self.cube.query(filters, drilldown, measures): + data_row = [] + for cell, value in row: + if stringify: + if cell.type is float: + value = formats.number_format(value, use_l10n=True) + u' %' + if isinstance(value, datetime.timedelta): + s = '' + if value.days: + s += '%d jour(s)' % value.days + if value.seconds / 3600: + s += ' %d heure(s)' % (value.seconds / 3600) + if not s: + s = 'moins d\'1 heure' + value = s + data_row.append(value) + data.append(data_row) + return data + + +class CubeView(CubeMixin, FormView): + template_name = 'bijoe/cube.html' + form_class = CubeForm + + def dispatch(self, request, *args, **kwargs): + try: + self.warehouse = Engine([warehouse for warehouse in get_warehouses() if warehouse.name + == self.kwargs['warehouse']][0]) + except IndexError: + raise Http404 + try: + self.cube = self.warehouse[self.kwargs['cube']] + except KeyError: + raise Http404 + return super(CubeView, self).dispatch(request, *args, **kwargs) + + def get_form_kwargs(self): + kwargs = super(CubeView, self).get_form_kwargs() + kwargs['cube'] = self.cube + return kwargs + + def form_valid(self, form): + if 'ods' in self.request.POST: + return self.ods(form) + else: + return self.form_invalid(form) + + def ods(self, form): + workbook = Workbook() + sheet = workbook.add_sheet(self.cube.label) + ctx = self.get_context_data(form=form) + for j, m in enumerate(ctx['drilldown'] + ctx['measures']): + sheet.write(0, j, m.label) + for i, row in enumerate(ctx['data']): + for j, cell in enumerate(row): + sheet.write(i + 1, j, unicode(cell)) + response = HttpResponse(content_type='application/vnd.oasis.opendocument.spreadsheet') + response['Content-Disposition'] = 'attachment; filename=%s.ods' % self.cube.name + workbook.save(response) + return response + + def get_context_data(self, **kwargs): + ctx = super(CubeView, self).get_context_data(**kwargs) + ctx['warehouse'] = self.warehouse + ctx['cube'] = self.cube + + form = ctx['form'] + if form.is_valid(): + ctx['data'] = self.get_data(form.cleaned_data) + ctx['measures'] = [self.cube.measures[measure] for measure in + form.cleaned_data['measures']] + ctx['drilldown'] = [self.cube.dimensions[dimension] for dimension in + form.cleaned_data['drilldown']] + json_data = [] + for row in self.get_data(form.cleaned_data, stringify=False): + coords = [] + for dimension, cell in zip(ctx['drilldown'], row): + coords.append({'label': dimension.label, 'value': cell}) + measures = [] + for measure, cell in zip(ctx['measures'], row[len(ctx['drilldown']):]): + if isinstance(cell, datetime.timedelta): + cell = cell.days + cell.seconds / 86400. + measures.append({'label': measure.label, 'value': cell}) + json_data.append({'coords': coords, 'measures': measures}) + + ctx['json'] = json.dumps(json_data, indent=2) + return ctx diff --git a/bijoe/wsgi.py b/bijoe/wsgi.py new file mode 100644 index 0000000..907103b --- /dev/null +++ b/bijoe/wsgi.py @@ -0,0 +1,14 @@ +""" +WSGI config for bijoe project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/1.7/howto/deployment/wsgi/ +""" + +import os +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "bijoe.settings") + +from django.core.wsgi import get_wsgi_application +application = get_wsgi_application() diff --git a/setup.py b/setup.py index 3229d20..e6c906e 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ def get_version(): return '0.0.0' -setup(name="wcs-olap", +setup(name="publik-bi", version=get_version(), license="AGPLv3+", description="Export w.c.s. data to an OLAP cube", @@ -54,8 +54,10 @@ setup(name="wcs-olap", maintainer_email="bdauvergne@entrouvert.com", packages=find_packages(), include_package_data=True, - install_requires=['requests','psycopg2', 'isodate'], + install_requires=['requests', 'django', 'psycopg2', 'isodate', 'Django-Select2', + 'XStatic-ChartNew.js'], entry_points={ 'console_scripts': ['wcs-olap=wcs_olap.cmd:main'], }, + scripts=['bijoe-ctl'], cmdclass={'sdist': eo_sdist}) diff --git a/tests/test_bijoe_schemas.py b/tests/test_bijoe_schemas.py new file mode 100644 index 0000000..9b426d5 --- /dev/null +++ b/tests/test_bijoe_schemas.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +from bijoe.schemas import Warehouse + + +def test_simple_parsing(): + Warehouse.from_json({ + 'name': 'coin', + 'label': 'coin', + 'pg_dsn': 'dbname=zozo', + 'search_path': ['cam', 'public'], + 'cubes': [ + { + 'name': 'all_formdata', + 'label': 'Tous les formulaires', + 'fact_table': 'formdata', + 'key': 'id', + 'joins': [ + { + 'name': 'formdef', + 'master': '{fact_table}.formdef_id', + 'table': 'formdef', + 'detail': 'formdef.id', + } + ], + 'dimensions': [ + { + 'label': 'formulaire', + 'name': 'formdef', + 'type': 'integer', + 'join': ['formdef'], + 'value': 'formdef.id', + 'value_label': 'formdef.label', + 'order_by': 'formdef.label' + }, + { + 'name': 'receipt_time', + 'label': 'date de soumission', + 'join': ['receipt_time'], + 'type': 'date', + 'value': 'receipt_time.date' + } + ], + 'measures': [ + { + 'type': 'integer', + 'label': 'Nombre de demandes', + 'expression': 'count({fact_table}.id)', + 'name': 'count' + }, + { + 'type': 'integer', + 'label': u'Délai de traitement', + 'expression': 'avg((to_char(endpoint_delay, \'9999.999\') || \' days\')::interval)', + 'name': 'avg_endpoint_delay' + }, + { + 'type': 'percent', + 'label': 'Pourcentage', + 'expression': 'count({fact_table}.id) * 100. / (select count({fact_table}.id) from {table_expression} where {where_conditions})', + 'name': 'percentage' + } + ] + } + ], + }) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..9e61007 --- /dev/null +++ b/tox.ini @@ -0,0 +1,19 @@ +# Tox (http://tox.testrun.org/) is a tool for running tests +# in multiple virtualenvs. This configuration file will run the +# test suite on all supported python versions. To use it, "pip install tox" +# and then run "tox" from this directory. + +[tox] +toxworkdir = {env:TMPDIR:/tmp}/tox-{env:USER}/publik-bi/ + +[testenv] +usedevelop = true +setenv = + coverage: COVERAGE=--junit-xml=junit.xml --cov=src --cov-report xml +deps = + coverage + pytest + pytest-cov + pytest-random +commands = + py.test {env:COVERAGE:} {posargs:--random tests} diff --git a/wcs_olap/cmd.py b/wcs_olap/cmd.py index 4aadf66..97985dc 100644 --- a/wcs_olap/cmd.py +++ b/wcs_olap/cmd.py @@ -43,10 +43,13 @@ def main2(): 'postgresql DB', add_help=False) parser.add_argument('config_path', nargs='?', default=None) group = parser.add_mutually_exclusive_group() + parser.add_argument('--no-feed', dest='feed', help='only produce the model', + action='store_false', default=True) group.add_argument("-a", "--all", help="synchronize all wcs", action='store_true', default=False) group.add_argument('--url', help='url of the w.c.s. instance', required=False, default=None) args, rest = parser.parse_known_args() + feed = args.feed config = get_config(path=args.config_path) # list all known urls urls = [url for url in config.sections() if url.startswith('http://') or @@ -90,7 +93,7 @@ def main2(): verify=defaults.get('verify', 'True') == 'True') logger.info('starting synchronizing w.c.s. at %r with PostgreSQL at %s', url, pg_dsn) feeder = WcsOlapFeeder(api=api, schema=schema, pg_dsn=pg_dsn, logger=logger, - config=defaults) + config=defaults, do_feed=feed) feeder.feed() logger.info('finished') defaults = {} diff --git a/wcs_olap/feeder.py b/wcs_olap/feeder.py index 836f56d..85ed77e 100644 --- a/wcs_olap/feeder.py +++ b/wcs_olap/feeder.py @@ -43,7 +43,7 @@ class Context(object): class WcsOlapFeeder(object): - def __init__(self, api, pg_dsn, schema, logger=None, config=None): + def __init__(self, api, pg_dsn, schema, logger=None, config=None, do_feed=True): self.api = api self.logger = logger or Whatever() self.schema = schema @@ -53,6 +53,7 @@ class WcsOlapFeeder(object): self.formdefs = api.formdefs self.roles = api.roles self.categories = api.categories + self.do_feed = do_feed self.ctx = Context() self.ctx.push({ 'schema': self.schema, @@ -72,275 +73,139 @@ class WcsOlapFeeder(object): self.model = { 'label': self.config.get('cubes_label', schema), 'name': schema, - 'browser_options': { - 'schema': schema, - }, + 'search_path': [schema, 'public'], + 'pg_dsn': pg_dsn, + 'cubes': [], + } + cube = { + 'name': 'all_formdata', + 'label': u'Tous les formulaires', + 'fact_table': 'formdata', + 'key': 'id', 'joins': [ { 'name': 'receipt_time', + 'table': 'dates', + 'detail': 'date', 'master': 'receipt_time', - 'detail': { - 'table': 'dates', - 'column': 'date', - 'schema': 'public', - }, - 'method': 'detail', - 'alias': 'dates', }, { 'name': 'channel', + 'table': 'channel', 'master': 'channel_id', - 'detail': '{channel_table}.id', - 'method': 'detail', - }, - { - 'name': 'role', - 'detail': '{role_table}.id', - 'method': 'detail', + 'detail': 'id', }, { 'name': 'formdef', + 'table': 'formdef', 'master': 'formdef_id', - 'detail': '{form_table}.id', - 'method': 'detail', + 'detail': 'id', }, { 'name': 'category', - 'master': '{form_table}.category_id', - 'detail': '{category_table}.id', + 'table': 'category', + 'master': 'formdef.category_id', + 'detail': 'id', + 'kind': 'left', }, { 'name': 'hour', + 'table': 'hour', 'master': 'hour_id', - 'detail': '{hour_table}.id', - 'method': 'detail', + 'detail': 'id', }, { 'name': 'generic_status', + 'table': 'status', 'master': 'generic_status_id', - 'detail': '{generic_status_table}.id', - 'method': 'detail', + 'detail': 'id', }, ], 'dimensions': [ { - 'label': 'date de soumission', 'name': 'receipt_time', - 'role': 'time', - 'levels': [ - { - 'name': 'year', - 'label': 'année', - 'role': 'year', - 'order_attribute': 'year', - 'order': 'asc', - }, - { - 'name': 'quarter', - 'order_attribute': 'quarter', - 'label': 'trimestre', - 'role': 'quarter', - }, - { - 'name': 'month', - 'label': 'mois', - 'role': 'month', - 'attributes': ['month', 'month_name'], - 'order_attribute': 'month', - 'label_attribute': 'month_name', - 'order': 'asc', - }, - { - 'name': 'week', - 'label': 'semaine', - 'role': 'week', - }, - { - 'name': 'day', - 'label': 'jour', - 'role': 'day', - 'order': 'asc', - }, - { - 'name': 'dow', - 'label': 'jour de la semaine', - 'attributes': ['dow', 'dow_name'], - 'order_attribute': 'dow', - 'label_attribute': 'dow_name', - 'order': 'asc', - }, - ], - 'hierarchies': [ - { - 'name': 'default', - 'label': 'par défaut', - 'levels': ['year', 'month', 'day'] - }, - { - 'name': 'quarterly', - 'label': 'par trimestre', - 'levels': ['year', 'quarter'] - }, - { - 'name': 'weekly', - 'label': 'par semaine', - 'levels': ['year', 'week'] - }, - { - 'name': 'dowly', - 'label': 'par jour de la semaine', - 'levels': ['dow'] - }, - ] + 'label': 'date de la demande', + 'join': ['receipt_time'], + 'type': 'date', + 'value': 'receipt_time.date', }, { - 'label': 'canaux', - 'name': 'channels', + 'name': 'channel', + 'label': 'canal', + 'join': ['channel'], + 'type': 'integer', + 'value': 'channel.id', + 'value_label': 'channel.label', }, { - 'label': 'catégories', - 'name': 'categories', + 'name': 'category', + 'label': 'catégorie', + 'join': ['formdef', 'category'], + 'type': 'integer', + 'value': 'category.id', + 'value_label': 'category.label', }, { - 'label': 'formulaire', 'name': 'formdef', + 'label': 'formulaire', + 'join': ['formdef'], + 'type': 'integer', + 'value': 'formdef.id', + 'value_label': 'formdef.label', }, { - 'label': 'statuts génériques', - 'name': 'generic_statuses', + 'name': 'generic_status', + 'label': 'statut générique', + 'join': ['generic_status'], + 'type': 'integer', + 'value': 'generic_status.id', + 'value_label': 'generic_status.label', }, { + 'name': 'hour', 'label': 'heure', - 'name': 'hours', - 'levels': [ - { - 'name': 'hours', - 'attributes': ['hour_id', 'hour_label'], - 'order_attribute': 'hour_id', - 'label_attribute': 'hour_label', - } - ] - }, + 'join': ['hour'], + 'type': 'integer', + 'value': 'hour.id', + 'filter': False, + } ], - 'mappings': { - 'receipt_time.year': { - 'table': 'dates', - 'column': 'date', - 'schema': 'public', - 'extract': 'year', - }, - 'receipt_time.month': { - 'table': 'dates', - 'column': 'date', - 'schema': 'public', - 'extract': 'month' - }, - 'receipt_time.month_name': { - 'table': 'dates', - 'schema': 'public', - 'column': 'month' - }, - 'receipt_time.week': { - 'table': 'dates', - 'column': 'date', - 'schema': 'public', - 'extract': 'week' - }, - 'receipt_time.day': { - 'table': 'dates', - 'column': 'date', - 'schema': 'public', - 'extract': 'day' - }, - 'receipt_time.dow': { - 'table': 'dates', - 'column': 'date', - 'schema': 'public', - 'extract': 'dow' - }, - 'receipt_time.dow_name': { - 'table': 'dates', - 'schema': 'public', - 'column': 'day', - }, - 'receipt_time.quarter': { - 'table': 'dates', - 'column': 'date', - 'schema': 'public', - 'extract': 'quarter' - }, - 'formdef': 'formdef.label', - 'channels': 'channel.label', - 'categories': 'category.label', - 'generic_statuses': 'status.label', - 'hours.hour_label': '{hour_table}.label', - 'hours.hour_id': '{hour_table}.id', - }, - 'cubes': [ + 'measures': [ { - 'name': schema + '_formdata', - 'label': 'Toutes les demandes (%s)' % schema, - 'key': 'id', - 'fact': 'formdata', - 'dimensions': [ - 'receipt_time', - 'hours', - 'channels', - 'categories', - 'formdef', - 'generic_statuses', - ], - 'joins': [ - { - 'name': 'receipt_time', - }, - { - 'name': 'hour', - }, - { - 'name': 'channel', - }, - { - 'name': 'formdef', - }, - { - 'name': 'category', - }, - { - 'name': 'generic_status', - }, - ], - 'measures': [ - { - 'name': 'endpoint_delay', - 'label': 'délai de traitement', - 'nonadditive': 'all', - }, - ], - 'aggregates': [ - { - 'name': 'record_count', - 'label': 'nombre de demandes', - 'function': 'count' - }, - { - 'name': 'endpoint_delay_max', - 'label': 'délai de traitement maximum', - 'measure': 'endpoint_delay', - 'function': 'max', - }, - { - 'name': 'endpoint_delay_avg', - 'label': 'délai de traitement moyen', - 'measure': 'endpoint_delay', - 'function': 'avg', - }, - ], + 'name': 'count', + 'label': 'nombre de demandes', + 'type': 'integer', + 'expression': 'count({fact_table}.id)', }, - ], + { + 'name': 'avg_endpoint_delay', + 'label': 'délai de traitement moyen', + 'type': 'duration', + 'expression': 'avg(endpoint_delay)', + }, + { + 'name': 'max_endpoint_delay', + 'label': 'délai de traitement maximum', + 'type': 'duration', + 'expression': 'max(endpoint_delay)', + }, + { + 'name': 'min_endpoint_delay', + 'label': 'délai de traitement minimum', + 'type': 'duration', + 'expression': 'min(endpoint_delay)', + }, + { + 'name': 'percent', + 'label': 'Pourcentage des demandes', + 'type': 'percent', + "expression": 'count({fact_table}.id) * 100. ' + '/ (select count({fact_table}.id) from {table_expression} ' + 'where {where_conditions})', + } + ] } - # apply table names - self.model = self.tpl(self.model) + self.model['cubes'].append(cube) self.base_cube = self.model['cubes'][0] def hash_table_name(self, table_name): @@ -473,39 +338,32 @@ class WcsOlapFeeder(object): ['id', 'serial primary key'], ['formdef_id', 'smallint REFERENCES {form_table} (id)'], ['receipt_time', 'date'], - ['year_id', 'smallint REFERENCES {year_table} (id)'], - ['month_id', 'smallint REFERENCES {month_table} (id)'], ['hour_id', 'smallint REFERENCES {hour_table} (id)'], - ['day_id', 'smallint REFERENCES {day_table} (id)'], - ['dow_id', 'smallint REFERENCES {dow_table} (id)'], ['channel_id', 'smallint REFERENCES {channel_table} (id)'], ['backoffice', 'boolean'], ['generic_status_id', 'smallint REFERENCES {generic_status_table} (id)'], - ['endpoint_delay', 'real'], + ['endpoint_delay', 'interval'], ] self.comments = { - 'formdef_id': u'dim$formulaire', - 'receipt_time': u'time$date de réception', - 'year_id': u'dim$année', - 'month_id': u'dim$mois', - 'hour_id': u'dim$heure', - 'day_id': u'dim$jour', - 'dow_id': u'dim$jour de la semaine', - 'channel_id': u'dim$canal', - 'backoffice': u'dim$soumission backoffce', - 'generic_status_id': u'dim$statut générique', - 'endpoint_delay': u'measure$délai de traitement', + 'formdef_id': u'formulaire', + 'receipt_time': u'date de réception', + 'hour_id': u'heure', + 'channel_id': u'canal', + 'backoffice': u'soumission backoffce', + 'generic_status_id': u'statut générique', + 'endpoint_delay': u'délai de traitement', } self.create_table('{generic_formdata_table}', self.columns) for at, comment in self.comments.iteritems(): self.ex('COMMENT ON COLUMN {generic_formdata_table}.%s IS %%s' % at, vars=(comment,)) def feed(self): - self.do_schema() - self.do_base_table() + if self.do_feed: + self.do_schema() + self.do_base_table() for formdef in self.formdefs: try: - formdef_feeder = WcsFormdefFeeder(self, formdef) + formdef_feeder = WcsFormdefFeeder(self, formdef, do_feed=self.do_feed) formdef_feeder.feed() except WcsApiError, e: # ignore authorization errors @@ -513,14 +371,19 @@ class WcsOlapFeeder(object): and e.args[2].response.status_code == 403): continue self.logger.error('failed to retrieve formdef %s', formdef.slug) + if 'cubes_model_dirs' in self.config: + model_path = os.path.join(self.config['cubes_model_dirs'], '%s.model' % self.schema) + with open(model_path, 'w') as f: + json.dump(self.model, f, indent=2, sort_keys=True) class WcsFormdefFeeder(object): - def __init__(self, olap_feeder, formdef): + def __init__(self, olap_feeder, formdef, do_feed=True): self.olap_feeder = olap_feeder self.formdef = formdef self.status_mapping = {} self.items_mappings = {} + self.do_feed = do_feed self.fields = [] @property @@ -614,25 +477,16 @@ class WcsFormdefFeeder(object): continue status = data.formdef.schema.workflow.statuses_map[data.workflow.status.id] - if data.endpoint_delay: - endpoint_delay = (data.endpoint_delay.days + float(data.endpoint_delay.seconds) / - 86400.) - else: - endpoint_delay = None row = { 'formdef_id': self.formdef_sql_id, 'receipt_time': data.receipt_time, - 'year_id': data.receipt_time.year, - 'month_id': data.receipt_time.month, - 'day_id': data.receipt_time.day, 'hour_id': data.receipt_time.hour, - 'dow_id': data.receipt_time.weekday(), 'channel_id': self.channel_to_id[data.submission.channel.lower()], 'backoffice': data.submission.backoffice, # FIXME "En cours"/2 is never used 'generic_status_id': 3 if status.endpoint else 1, 'status_id': self.status_mapping[data.workflow.status.id], - 'endpoint_delay': endpoint_delay, + 'endpoint_delay': data.endpoint_delay, } # add form fields value for field in self.fields: @@ -673,78 +527,78 @@ class WcsFormdefFeeder(object): }) # create cube - self.cube = copy.deepcopy(self.base_cube) - self.cube.update({ - 'name': self.schema + '_' + self.table_name, + cube = self.cube = copy.deepcopy(self.base_cube) + cube.update({ + 'name': self.table_name, 'label': self.formdef.schema.name, - 'fact': self.table_name, + 'fact_table': self.table_name, + 'key': 'id', }) + cube['dimensions'] = [dimension for dimension in cube['dimensions'] + if dimension['name'] not in ('category', 'formdef')] + # add dimension for status - self.cube['joins'].append({ + cube['joins'].append({ + 'name': 'status', + 'table': self.status_table_name, 'master': 'status_id', - 'detail': '%s.id' % self.status_table_name, - 'method': 'detail', + 'detail': 'id', }) - dim_name = '%s_%s' % (self.table_name, 'status') - self.model['dimensions'].append({ - 'name': dim_name, + cube['dimensions'].append({ + 'name': 'status', 'label': 'statut', - 'levels': [ - { - 'name': 'status', - 'attributes': ['status_id', 'status_label'], - 'order_attribute': 'status_id', - 'label_attribute': 'status_label', - }, - ], + 'join': ['status'], + 'type': 'integer', + 'value': 'status.id', + 'value_label': 'status.label', }) - self.model['mappings']['%s.status_id' % dim_name] = '%s.id' % self.status_table_name - self.model['mappings']['%s.status_label' % dim_name] = '%s.label' % self.status_table_name - self.cube['dimensions'].append(dim_name) # add dimension for function for function, name in self.formdef.schema.workflow.functions.iteritems(): at = 'function_%s' % slugify(function) - dim_name = '%s_function_%s' % (self.table_name, slugify(function)) - self.cube['joins'].append({ + cube['joins'].append({ + 'name': at, + 'table': 'role', 'master': at, - 'detail': self.tpl('{role_table}.id'), - 'alias': at, + 'detail': 'id', }) - self.model['dimensions'].append({ - 'name': dim_name, + cube['dimensions'].append({ + 'name': at, 'label': u'fonction %s' % name, + 'join': [at], + 'type': 'integer', + 'value': '%s.id' % at, + 'value_label': '%s.label' % at, + 'filter': False, }) - self.model['mappings'][dim_name] = '%s.label' % at - self.cube['dimensions'].append(dim_name) # add dimensions for item fields for field in self.fields: if field.type != 'item': continue table_name = self.hash_table_name('{formdata_table}_field_%s' % field.varname) - self.cube['joins'].append({ + cube['joins'].append({ + 'name': field.varname, + 'table': 'table_name', 'master': 'field_%s' % field.varname, - 'detail': '%s.id' % table_name, - 'method': 'detail', + 'detail': 'id' % table_name, }) - dim_name = '%s_%s' % (self.table_name. field.varname) - self.model['dimensions'].append({ - 'name': dim_name, + cube['dimensions'].append({ + 'name': field.varname, 'label': field.label, + 'join': [field.varname], + 'type': 'integer', + 'value': '%s.id' % field.varname, + 'value_label': '%s.label' % field.varname, + 'filter': field.in_filters, }) - self.model['mappings'][dim_name] = '%s.label' % table_name - self.cube['dimensions'].append(dim_name) - self.model['cubes'].append(self.cube) - try: - self.logger.info('feed formdef %s', self.formdef.slug) - self.do_statuses() - self.do_data_table() - self.do_data() - finally: - self.olap_feeder.ctx.pop() - if 'cubes_model_dirs' in self.config: - model_path = os.path.join(self.config['cubes_model_dirs'], '%s.json' % self.schema) - with open(model_path, 'w') as f: - json.dump(self.model, f, indent=2, sort_keys=True) + self.model['cubes'].append(cube) + if self.do_feed: + try: + self.logger.info('feed formdef %s', self.formdef.slug) + self.do_statuses() + self.do_data_table() + self.do_data() + finally: + self.olap_feeder.ctx.pop()