summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MANIFEST.in3
-rw-r--r--README.rst248
-rwxr-xr-xbijoe-ctl10
-rw-r--r--bijoe/__init__.py0
-rw-r--r--bijoe/engine.py216
-rw-r--r--bijoe/forms.py71
-rw-r--r--bijoe/schemas.py297
-rw-r--r--bijoe/settings.py123
-rw-r--r--bijoe/templates/bijoe/base.html1
-rw-r--r--bijoe/templates/bijoe/cube.html183
-rw-r--r--bijoe/templates/bijoe/homepage.html16
-rw-r--r--bijoe/templates/bijoe/warehouse.html17
-rw-r--r--bijoe/urls.py12
-rw-r--r--bijoe/utils.py14
-rw-r--r--bijoe/views.py134
-rw-r--r--bijoe/wsgi.py14
-rw-r--r--setup.py6
-rw-r--r--tests/test_bijoe_schemas.py65
-rw-r--r--tox.ini19
-rw-r--r--wcs_olap/cmd.py5
-rw-r--r--wcs_olap/feeder.py460
21 files changed, 1599 insertions, 315 deletions
diff --git a/MANIFEST.in b/MANIFEST.in
index 0d8124e..096159e 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,5 @@
include VERSION
+include tox.ini
+recursive-include tests *.py
include create_dates.sql
+recursive-include bijoe/templates *.html
diff --git a/README.rst b/README.rst
index 32ad7a9..c9c2381 100644
--- a/README.rst
+++ b/README.rst
@@ -2,22 +2,252 @@ BI for Publik
=============
w.c.s. OLAP
------------
+~~~~~~~~~~~
Tool to export w.c.s. data in a database with star schema for making an OLAP
-cube.::
+cube.
- usage: wcs-olap --url URL [-h] --orig ORIG --key KEY
- --pg-dsn PG_DSN
+::
+
+ usage: wcs-olap [--no-feed] [-a | --url URL] [-h] [--orig ORIG] [--key KEY]
+ [--pg-dsn PG_DSN] [--schema SCHEMA]
+ [config_path]
Export W.C.S. data as a star schema in a postgresql DB
+ positional arguments:
+ config_path
+
optional arguments:
- --url URL url of the w.c.s. instance
- -h, --help show this help message and exit
- --orig ORIG origin of the request for signatures
- --key KEY HMAC key for signatures
- --pg-dsn PG_DSN Psycopg2 DB DSN
+ --no-feed only produce the model
+ -a, --all synchronize all wcs
+ --url URL url of the w.c.s. instance
+ -h, --help show this help message and exit
+ --orig ORIG origin of the request for signatures
+ --key KEY HMAC key for signatures
+ --pg-dsn PG_DSN Psycopg2 DB DSN
+ --schema SCHEMA schema name
+
+Bi-Joe
+~~~~~~
+
+BI Joe is a library and a Django application to simplify querying a postgresql
+database containing a star schema with BI annotations in order to
+easily produce BI dashboards
+
+It's inspired by the Cubes project.
+
+Feature
+~~~~~~~
+
+* use PostgreSQL database as datastore,
+* declare joins to define star schemas of your cubes
+* declare dimensions as SQL query or exepression defining, label, group by,
+ ordering or member list.
+* declare measure as SQL expression using aggregate functions
+
+Missing features
+~~~~~~~~~~~~~~~~
+
+* hierarchical dimensions
+* measure necessiting a rollup (percentage based on count() of facts at an upper
+ level)
+
+Model
+~~~~~
+
+You declare your model using JSON files, those JSON files are targeted by a list
+of glob patterns in the Djagno setting BIJOE_SCHEMAS.
+
+.. code:: python
+
+ BIJOE_SCHEMAS = ['/var/lib/bijoe/cubes/*.model']
+
+The JSON model files must conform to this schema:
+
+* name: technical identifier of the mode, a short string without space if
+ preferred
+
+* label: any string describing the model
+
+* pg_dsn: string describing the connection to PostgreSQL, as expected by
+ psycopg2, ex.: `"dbname=olap_db user=admin password=xxx"`
+
+* search_path: search path to set set if relations are not all in the public
+ schema,
+
+* cubes: the list of cube descriptors,
+
+ * name: technical identifier of the cube, same remark thant for models,
+
+ * label: as for model,
+
+ * fact_table: name of the table storing facts
+
+ * key: column of the table identifying individual facts
+
+ * joins: list of equality joins, all joins are RIGHT OUTER JOINS, table are
+ cross joined when drill involve dimensions using multiple joins.
+
+ * name: SQL identifier for naming the join,
+
+ * table: name of the relation being joined,
+
+ * master: table and column indicating the left part of the equality
+ condition for the join, you can use `mydim_id` to reference the fact
+ table or `otherjoin.mydim_id` to reference another join.
+
+ * detail: name of the column on the joined table for the equality condition,
+
+ * kind: type of join, must be `inner`, `left` or `right`, default is right.
+
+ * dimensions: list of dimension descriptors,
+
+ * name: technical identifier of the dimension, it will be used to name the
+ dimension in the API,
+
+ * label: human description for the dimension, used in UIs,
+
+ * join: list of join names, indicate that some joins must be used when using
+ this dimension,
+
+ * type: indicate the type of the dimension, numerical, time-like,
+ geographical, duration, etc..
+
+ * value: SQL expression giving the value for the dimension,
+ it can be different than the value used for filtering or grouping,
+ * sql_filter: SQL expression that will be used in the SQL template
+ `<sql_filter> IN (...)` when filtering along a dimension,
+
+ * value_label: SQL expression giving the shown value
+
+ * group_by: SQL expression to group facts along a dimension, default is to
+ use value
+
+ * order_by: SQL expression to order dimension values, default is to use
+ value
+
+ * measures: list of measure descriptors
+
+ * name: as for models,
+
+ * label: as for models,
+
+ * type: type of the measure: integer or duration,
+
+ * expression: SQL expression indicating how to compute the aggregate,
+ ex.: `avg(delay)`, `count(product_type)`.
+
+Example
++++++++
+
+.. code:: json
+
+ {
+ "name" : "cam",
+ "label" : "cam",
+ "pg_dsn" : "dbname=wcs-olap",
+ "search_path" : [
+ "cam",
+ "public"
+ ],
+ "cubes" : [
+ {
+ "name" : "all_formdata",
+ "label" : "Tous les formulaires",
+ "fact_table" : "formdata",
+ "key" : "id",
+ "joins" : [
+ {
+ "name" : "formdef",
+ "master" : "formdef_id",
+ "detail" : "id",
+ "table" : "formdef"
+ },
+ {
+ "name" : "dates",
+ "master" : "receipt_time",
+ "detail" : "date",
+ "table" : "dates"
+ }
+ ],
+ "dimensions" : [
+ {
+ "name" : "formdef",
+ "label" : "formulaire",
+ "type" : "integer",
+ "join" : ["formdef"],
+ "value" : "formdef.id",
+ "value_label" : "formdef.label"
+ },
+ {
+ "join": [
+ "receipt_time"
+ ],
+ "label": "date de la demande",
+ "name": "receipt_time",
+ "type": "date",
+ "value": "receipt_time.date"
+ },
+ ],
+ "measures" : [
+ {
+ "name": "count",
+ "label": "nombre de demandes",
+ "type": "integer",
+ "expression": "count({fact_table}.id)",
+ },
+ {
+ "name" : "avg_endpoint_delay",
+ "label" : "Délai de traitement",
+ "type" : "duration",
+ "expression" : "avg(endpoint_delay)"
+ }
+ ]
+ }
+ ]
+ }
+
+API
+~~~
+
+Model description is handled by `bijoe.schema` and model querying by
+`bijoe.engine`.
+
+bijoe.schema.Warehouse
+++++++++++++++++++++++
+
+`Warehouse` is the main class to manipulate models, it has two class methods:
+
+* `from_json(d)` which transform a dictionnary obtained by eventually parsing
+ some JSON file to a Warehouse object.
+
+* `to_json()` which transform a Warehouse object into a JSON compatible
+ dictionnary.
+
+bijoe.engine.Engine
++++++++++++++++++++
+
+`Engine(warehouse)` is the entry-point for querying a model, you get an
+`EngineCube` object by indexing the engine with the name of a cube.
+
+.. code:: python
+
+ cube = Engine(warehouse)['mycube']
+
+
+You can query your cube using the `query` method.
+
+.. code:: python
+ cube.query(filters=[('year', [2013, 2014, 2015])],
+ drilldown=['year', 'product'],
+ measures=['count', 'avg_sale'])
+It returns a sequence of rows whose elements are the values of the drilldown
+dimensions in the same order as in the query followed by the values of the
+measures also in the same ordre as the query.
+The `count` measure is a special measure which is always present whose
+expression is always `count({fact_table}.{key})` where `fact_table` and `key`
+are the corresponding attributes of the cube.
diff --git a/bijoe-ctl b/bijoe-ctl
new file mode 100755
index 0000000..65ea9ae
--- /dev/null
+++ b/bijoe-ctl
@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+import os
+import sys
+
+if __name__ == "__main__":
+ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "bijoe.settings")
+
+ from django.core.management import execute_from_command_line
+
+ execute_from_command_line(sys.argv)
diff --git a/bijoe/__init__.py b/bijoe/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/bijoe/__init__.py
diff --git a/bijoe/engine.py b/bijoe/engine.py
new file mode 100644
index 0000000..799bbbc
--- /dev/null
+++ b/bijoe/engine.py
@@ -0,0 +1,216 @@
+import logging
+
+import collections
+import psycopg2
+
+
+def to_tuple(cur, values):
+ return cur.mogrify(', '.join(['%s'] * len(values)), values)
+
+
+Member = collections.namedtuple('Member', ['id', 'label'])
+
+
+class EngineDimension(object):
+ def __init__(self, engine, engine_cube, dimension):
+ self.engine = engine
+ self.engine_cube = engine_cube
+ self.dimension = dimension
+
+ def __getattr__(self, name):
+ return getattr(self.dimension, name)
+
+ @property
+ def members(self):
+ assert self.dimension.type != 'date'
+ cursor = self.engine.get_cursor()
+ if self.dimension.join:
+ join = self.engine_cube.get_join(self.dimension.join[-1])
+ sql = 'SELECT %s AS value, %s::text AS label FROM %s AS %s ORDER BY %s' % (
+ self.value, self.value_label or self.value, join.table, join.name, self.order_by or
+ self.value)
+ else:
+ sql = 'SELECT %s AS value, %s::text AS label FROM {fact_table} ORDER BY %s' % (
+ self.value, self.value_label or self.value, self.self.order_by or self.value)
+ sql = sql.format(fact_table=self.engine_cube.fact_table)
+ self.engine.log.debug('SQL: %s', sql)
+ cursor.execute(sql)
+ for row in cursor.fetchall():
+ yield Member(*row)
+
+
+class EngineMeasure(object):
+ def __init__(self, engine, engine_cube, measure):
+ self.engine = engine
+ self.engine_cube = engine_cube
+ self.measure = measure
+
+ def __getattr__(self, name):
+ return getattr(self.measure, name)
+
+
+class ProxyList(object):
+ def __init__(self, engine, engine_cube, attribute, cls):
+ self.engine = engine
+ self.engine_cube = engine_cube
+ self.attribute = attribute
+ self.cls = cls
+
+ def __iter__(self):
+ return (self.cls(self.engine, self.engine_cube, o)
+ for o in getattr(self.engine_cube.cube, self.attribute))
+
+ def __getitem__(self, name):
+ for o in getattr(self.engine_cube.cube, self.attribute):
+ if o.name == name:
+ return self.cls(self.engine, self.engine_cube, o)
+ raise KeyError
+
+
+class ProxyListDescriptor(object):
+ def __init__(self, attribute, cls):
+ self.attribute = attribute
+ self.cls = cls
+
+ def __get__(self, obj, t=None):
+ return ProxyList(obj.engine, obj, self.attribute, self.cls)
+
+
+class EngineCube(object):
+ dimensions = ProxyListDescriptor('all_dimensions', EngineDimension)
+ measures = ProxyListDescriptor('measures', EngineMeasure)
+
+ def __init__(self, warehouse, cube):
+ self.engine = warehouse
+ self.cube = cube
+
+ def __getattr__(self, name):
+ return getattr(self.cube, name)
+
+ def sql_query(self, filters, drilldown, measures, **kwargs):
+ cursor = self.engine.get_cursor()
+
+ projections = []
+ joins = set()
+ where = []
+ group_by = []
+ order_by = []
+
+ for dimension_name, values in filters:
+ dimension = self.cube.get_dimension(dimension_name)
+ assert dimension.filter
+ condition, values = dimension.filter(values)
+ condition = cursor.mogrify(condition, values)
+ where.append(condition)
+ joins.update(dimension.join)
+
+ for dimension_name in drilldown:
+ dimension = self.cube.get_dimension(dimension_name)
+ joins.update(dimension.join)
+ projections.append('%s AS %s' % (dimension.value_label or dimension.value,
+ dimension.name))
+ group_by.append(dimension.group_by or dimension.value)
+ order_by.append(dimension.order_by or dimension.value)
+
+ for measure_name in measures:
+ measure = self.cube.get_measure(measure_name)
+ if measure.expression not in projections:
+ projections.append(measure.expression + ' AS ' + measure.name)
+ sql = 'SELECT ' + ', '.join(projections)
+ table_expression = ' %s' % self.cube.fact_table
+ if joins:
+ join_tree = {}
+ for join_name in joins:
+ join = self.cube.get_join(join_name)
+ if '.' in join.master:
+ master_table = join.master.split('.', 1)[0]
+ else:
+ master_table = self.fact_table
+ join_tree.setdefault(master_table, {}).setdefault(join.kind, {})[join.name] = join
+
+ def build_table_expression(table_name, alias=None, top=True):
+ sql = table_name
+ if alias:
+ sql += ' AS %s' % alias
+ add_paren = False
+ for kind in ['left', 'inner', 'right']:
+ joins = join_tree.get(table_name, {}).get(kind)
+ if not joins:
+ continue
+ add_paren = True
+ join_kinds = {
+ 'inner': 'INNER JOIN',
+ 'left': 'LEFT OUTER JOIN',
+ 'right': 'RIGHT OUTER JOIN',
+ }
+ sql += ' %s ' % join_kinds[kind]
+ sub_joins = []
+ conditions = []
+ for join_name, join in joins.iteritems():
+ sub_joins.append(
+ build_table_expression(join.table, join.name, top=False))
+ conditions.append('%s.%s = %s.%s' % (alias or table_name,
+ join.master.split('.')[-1],
+ join.name, join.detail))
+ sub_sql = ' CROSS JOIN '.join(sub_joins)
+ if len(sub_joins) > 1:
+ sub_sql = '(%s)' % sub_sql
+ sql += sub_sql
+ sql += ' ON %s' % ' AND '.join(conditions)
+ if not top and add_paren:
+ sql = '(%s)' % sql
+ return sql
+
+ table_expression = build_table_expression(self.fact_table)
+ sql += ' FROM %s' % table_expression
+ where_conditions = 'true'
+ if where:
+ where_conditions = ' AND '.join(where)
+ sql += ' WHERE %s' % where_conditions
+ if group_by:
+ sql += ' GROUP BY %s' % ', '.join(group_by)
+ if order_by:
+ sql += ' ORDER BY %s' % ', '.join(order_by)
+ sql = sql.format(fact_table=self.cube.fact_table,
+ table_expression=table_expression,
+ where_conditions=where_conditions)
+ sql = sql.format(fact_table=self.cube.fact_table,
+ table_expression=table_expression,
+ where_conditions=where_conditions)
+ return sql
+
+ def query(self, filters, drilldown, measures, **kwargs):
+ self.engine.log.debug('%s.%s query filters=%s drilldown=%s measures=%s',
+ self.engine.warehouse.name, self.cube.name, filters, drilldown,
+ measures)
+ cells = []
+ for dimension_name in drilldown:
+ cells.append(self.dimensions[dimension_name])
+ for measure_name in measures:
+ cells.append(self.measures[measure_name])
+ cursor = self.engine.get_cursor()
+ sql = self.sql_query(filters=filters, drilldown=drilldown, measures=measures, **kwargs)
+ self.engine.log.debug('SQL: %s', sql)
+ cursor.execute(sql)
+ for row in cursor.fetchall():
+ yield zip(cells, row)
+
+
+class Engine(object):
+ def __init__(self, warehouse):
+ self.warehouse = warehouse
+ self.log = logging.getLogger(__name__)
+
+ def __getitem__(self, name):
+ return EngineCube(self, self.warehouse.get_cube(name))
+
+ def __getattr__(self, name):
+ return getattr(self.warehouse, name)
+
+ def get_cursor(self):
+ connection = psycopg2.connect(
+ self.warehouse.pg_dsn)
+ cursor = connection.cursor()
+ search_path = ', '.join(['"%s"' % namespace for namespace in self.warehouse.search_path])
+ cursor.execute('SET SEARCH_PATH = %s' % search_path)
+ return cursor
diff --git a/bijoe/forms.py b/bijoe/forms.py
new file mode 100644
index 0000000..3aada31
--- /dev/null
+++ b/bijoe/forms.py
@@ -0,0 +1,71 @@
+import datetime
+
+from django import forms
+from django.utils.translation import ugettext as _
+
+from django_select2.forms import Select2MultipleWidget
+
+
+class DateRangeWidget(forms.MultiWidget):
+ def __init__(self, attrs=None):
+ attrs2 = {'type': 'date'}
+ if attrs:
+ attrs2.update(attrs)
+ widgets = (
+ forms.DateInput(attrs=attrs2.copy()),
+ forms.DateInput(attrs=attrs2.copy()),
+ )
+ super(DateRangeWidget, self).__init__(widgets, attrs=attrs)
+
+ def decompress(self, value):
+ if not value:
+ return None, None
+ return value
+
+
+class DateRangeField(forms.MultiValueField):
+ widget = DateRangeWidget
+
+ def __init__(self, *args, **kwargs):
+ # Or define a different message for each field.
+ fields = (
+ forms.DateField(required=False),
+ forms.DateField(required=False),
+ )
+ super(DateRangeField, self).__init__(fields=fields, require_all_fields=False, *args,
+ **kwargs)
+
+ def compress(self, values):
+ return values
+
+
+class CubeForm(forms.Form):
+ def __init__(self, *args, **kwargs):
+ self.cube = cube = kwargs.pop('cube')
+ super(CubeForm, self).__init__(*args, **kwargs)
+
+ # filters
+ for dimension in cube.dimensions:
+ if not dimension.filter:
+ continue
+ field_name = 'filter__%s' % dimension.name
+ if dimension.type is datetime.date:
+ self.fields[field_name] = DateRangeField(
+ label=dimension.label.capitalize(), required=False)
+ else:
+ self.fields[field_name] = forms.MultipleChoiceField(
+ label=dimension.label.capitalize(),
+ choices=dimension.members,
+ required=False,
+ widget=Select2MultipleWidget())
+
+ # group by
+ choices = [(dimension.name, dimension.label) for dimension in cube.dimensions
+ if dimension.type not in (datetime.datetime, datetime.date)]
+ self.fields['drilldown'] = forms.MultipleChoiceField(
+ label=_('Group by'), choices=choices, required=False, widget=Select2MultipleWidget())
+
+ # measures
+ choices = [(measure.name, measure.label) for measure in cube.measures]
+ self.fields['measures'] = forms.MultipleChoiceField(
+ label=_('Measures'), choices=choices, widget=Select2MultipleWidget())
diff --git a/bijoe/schemas.py b/bijoe/schemas.py
new file mode 100644
index 0000000..3679a17
--- /dev/null
+++ b/bijoe/schemas.py
@@ -0,0 +1,297 @@
+# -*- coding: utf-8 -*-
+import datetime
+import decimal
+import collections
+
+
+TYPE_MAP = {
+ 'duration': datetime.timedelta,
+ 'date': datetime.date,
+ 'integer': int,
+ 'decimal': decimal.Decimal,
+ 'percent': float,
+}
+
+
+class SchemaError(Exception):
+ pass
+
+
+def type_to_json(t):
+ for k, v in TYPE_MAP.items():
+ if t is v:
+ return k
+
+
+def type_cast(t):
+ if isinstance(t, type):
+ return t
+ else:
+ return TYPE_MAP[t]
+type_cast.to_json = type_to_json
+
+
+class Base(object):
+ __types__ = {}
+
+ def __init__(self, **kwargs):
+ for k, v in kwargs.iteritems():
+ setattr(self, k, v)
+
+ @classmethod
+ def slots(cls):
+ s = set()
+ for cls in reversed(cls.__mro__):
+ if hasattr(cls, '__slots__'):
+ s.update(cls.__slots__)
+ return s
+
+ @classmethod
+ def types(cls):
+ d = {}
+ for cls in reversed(cls.__mro__):
+ if hasattr(cls, '__types__'):
+ d.update(cls.__types__)
+ return d
+
+ @classmethod
+ def from_json(cls, d):
+ assert hasattr(d, 'keys')
+ slots = cls.slots()
+ assert set(d.keys()) <= set(slots), \
+ 'given keys %r does not match %s.__slots__: %r' % (d.keys(), cls.__name__, slots)
+ types = cls.types()
+
+ kwargs = {}
+ for key in slots:
+ assert key in d or hasattr(cls, key), \
+ '%s.%s is is a mandatory attribute' % (cls.__name__, key)
+ if not key in d:
+ continue
+ value = d[key]
+ if key in types:
+ kls = types[key]
+ if isinstance(kls, list):
+ kls = kls[0]
+ if hasattr(kls, 'from_json'):
+ value = [kls.from_json(v) for v in value]
+ else:
+ value = [kls(v) for v in value]
+ elif hasattr(kls, 'from_json'):
+ value = kls.from_json(value)
+ else:
+ value = kls(value)
+ kwargs[key] = value
+ return cls(**kwargs)
+
+ def to_json(self):
+ d = {}
+ types = self.types()
+ for attr in self.slots():
+ try:
+ v = getattr(self, attr)
+ except AttributeError:
+ pass
+ else:
+ if attr in types and hasattr(types[attr], 'to_json'):
+ v = types[attr].to_json(v)
+ if isinstance(v, list):
+ v = [x.to_json() if hasattr(x, 'to_json') else x for x in v]
+ d[attr] = v
+ return d
+
+ def __repr__(self):
+ kwargs = ['%s=%r' % (key, getattr(self, key)) for key in self.slots() if hasattr(self, key)]
+ return '<%s %s>' % (self.__class__.__name__, ' '.join(kwargs))
+
+
+class Measure(Base):
+ __slots__ = ['name', 'label', 'type', 'expression']
+ __types__ = {
+ 'name': str,
+ 'label': unicode,
+ 'type': type_cast,
+ 'expression': str,
+ }
+
+
+class Dimension(Base):
+ __slots__ = ['name', 'label', 'type', 'join', 'value', 'value_label',
+ 'order_by', 'group_by', 'filter_in_join', 'filter']
+ __types__ = {
+ 'name': str,
+ 'label': unicode,
+ 'type': type_cast,
+ 'join': [str],
+ 'value': str,
+ 'value_label': str,
+ 'order_by': str,
+ 'group_by': str,
+ 'filter': bool,
+ }
+
+ label = None
+ value_label = None
+ order_by = None
+ group_by = None
+ join = None
+ filter = True
+
+ @property
+ def dimensions(self):
+ if self.type is datetime.date:
+ return [
+ self,
+ Dimension(
+ label=u'année (%s)' % self.label,
+
+ name=self.name + '__year',
+ type='integer',
+ join=self.join,
+ value='EXTRACT(year from %s)::integer' % self.value,
+ filter=False),
+ Dimension(
+ label=u'mois (%s)' % self.label,
+ name=self.name + '__month',
+ type='integer',
+ join=self.join,
+ value='EXTRACT(month from %s)' % self.value,
+ value_label='to_char(date_trunc(\'month\', %s), \'TMmonth\')' % self.value,
+ group_by='EXTRACT(month from %s), '
+ 'to_char(date_trunc(\'month\', %s), \'TMmonth\')'
+ % (self.value, self.value),
+ filter=False),
+ Dimension(
+ label=u'jour de la semaine (%s)' % self.label,
+ name=self.name + '__dow',
+ type='integer',
+ join=self.join,
+ value='EXTRACT(dow from %s)' % self.value,
+ value_label='to_char(date_trunc(\'week\', current_date)::date '
+ '+ EXTRACT(dow from %s)::integer, \'TMday\')' % self.value,
+ filter=False)
+ ]
+ return [self]
+
+ def filter(self, filter_values):
+ if self.type is datetime.date:
+ assert len(filter_values) == 2
+ filters = []
+ values = []
+ try:
+ if filter_values[0]:
+ filters.append('%s >= %%s' % self.value)
+ values.append(filter_values[0])
+ except IndexError:
+ pass
+ try:
+ if filter_values[1]:
+ filters.append('%s <= %%s' % self.value)
+ values.append(filter_values[1])
+ except IndexError:
+ pass
+ return ' AND '.join(filters), values
+ else:
+ if not filter_values:
+ return '', []
+ if self.type == 'integer':
+ values = map(int, filter_values)
+ else:
+ values = filter_values
+ s = ', '.join(['%s'] * len(values))
+ return '%s IN (%s)' % (self.value, s), values
+
+
+def join_kind(kind):
+ if kind not in ('inner', 'left', 'right'):
+ raise ValueError('bad joind kind: %s' % kind)
+ return kind
+
+
+class Join(Base):
+ __slots__ = ['name', 'table', 'master', 'detail', 'kind']
+ __types__ = {
+ 'name': str,
+ 'table': str,
+ 'master': str,
+ 'detail': str,
+ 'kind': join_kind,
+ }
+
+ kind = 'right'
+
+
+class Cube(Base):
+ __slots__ = ['name', 'label', 'fact_table', 'key', 'joins', 'dimensions', 'measures']
+ __types__ = {
+ 'name': str,
+ 'label': unicode,
+ 'fact_table': str,
+ 'key': str,
+ 'joins': [Join],
+ 'dimensions': [Dimension],
+ 'measures': [Measure],
+ }
+
+ joins = ()
+ dimensions = ()
+ measures = ()
+
+ def check(self):
+ names = collections.Counter()
+ names.update(join.name for join in self.joins)
+ names.update(dimension.name for dimension in self.dimensions)
+ names.update(measure.name for measure in self.measures)
+ duplicates = [k for k, v in names.iteritems() if v > 1]
+ if duplicates:
+ raise SchemaError(
+ 'More than one join, dimension or measure with name(s) %s' % ', '.join(duplicates))
+
+ @property
+ def all_dimensions(self):
+ for dimension in self.dimensions:
+ for sub_dimension in dimension.dimensions:
+ yield sub_dimension
+
+ def get_dimension(self, name):
+ for dimension in self.dimensions:
+ for sub_dimension in dimension.dimensions:
+ if sub_dimension.name == name:
+ return sub_dimension
+ raise KeyError
+
+ def get_join(self, name):
+ for join in self.joins:
+ if join.name == name:
+ return join
+ raise KeyError
+
+ def get_measure(self, name):
+ for measure in self.measures:
+ if measure.name == name:
+ return measure
+ raise KeyError
+
+
+class Warehouse(Base):
+ __slots__ = ['name', 'label', 'pg_dsn', 'search_path', 'cubes']
+ __types__ = {
+ 'name': str,
+ 'label': unicode,
+ 'pg_dsn': str,
+ 'search_path': [str],
+ 'cubes': [Cube],
+ 'search_path': [str],
+ }
+
+ def check(self):
+ names = collections.Counter(cube.name for cube in self.cubes)
+ duplicates = [k for k, v in names.iteritems() if v > 1]
+ if duplicates:
+ raise SchemaError('More than one cube with name(s) %s' % ', '.join(duplicates))
+
+ def get_cube(self, name):
+ for cube in self.cubes:
+ if cube.name == name:
+ return cube
+ raise KeyError
diff --git a/bijoe/settings.py b/bijoe/settings.py
new file mode 100644
index 0000000..8ac128e
--- /dev/null
+++ b/bijoe/settings.py
@@ -0,0 +1,123 @@
+"""
+Django settings for bijoe project.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/1.7/topics/settings/
+
+For the full list of settings and their values, see
+https://docs.djangoproject.com/en/1.7/ref/settings/
+"""
+
+from django.conf import global_settings
+
+# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
+import os
+BASE_DIR = os.path.dirname(os.path.dirname(__file__))
+
+
+# Quick-start development settings - unsuitable for production
+# See https://docs.djangoproject.com/en/1.7/howto/deployment/checklist/
+
+# SECURITY WARNING: keep the secret key used in production secret!
+SECRET_KEY = '-4h1(4b!p-8)or!_!iw%&e89+$6(_yf#b^2e+=fc$e2)+h16m6'
+
+# SECURITY WARNING: don't run with debug turned on in production!
+DEBUG = True
+
+TEMPLATE_DEBUG = True
+
+ALLOWED_HOSTS = []
+
+
+# Application definition
+
+INSTALLED_APPS = (
+ 'django.contrib.admin',
+ 'django.contrib.auth',
+ 'django.contrib.contenttypes',
+ 'django.contrib.sessions',
+ 'django.contrib.messages',
+ 'django.contrib.staticfiles',
+ 'xstatic.pkg.chartnew_js',
+ 'django_select2',
+ 'gadjo',
+ 'bijoe',
+)
+
+MIDDLEWARE_CLASSES = (
+ 'django.contrib.sessions.middleware.SessionMiddleware',
+ 'django.middleware.common.CommonMiddleware',
+ 'django.middleware.csrf.CsrfViewMiddleware',
+ 'django.contrib.auth.middleware.AuthenticationMiddleware',
+ 'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
+ 'django.contrib.messages.middleware.MessageMiddleware',
+ 'django.middleware.clickjacking.XFrameOptionsMiddleware',
+)
+
+STATICFILES_FINDERS = global_settings.STATICFILES_FINDERS + ('gadjo.finders.XStaticFinder',)
+
+
+ROOT_URLCONF = 'bijoe.urls'
+
+WSGI_APPLICATION = 'bijoe.wsgi.application'
+
+
+# Database
+# https://docs.djangoproject.com/en/1.7/ref/settings/#databases
+
+DATABASES = {
+ 'default': {
+ 'ENGINE': 'django.db.backends.sqlite3',
+ 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
+ }
+}
+
+# Internationalization
+# https://docs.djangoproject.com/en/1.7/topics/i18n/
+
+LANGUAGE_CODE = 'en-us'
+
+TIME_ZONE = 'UTC'
+
+USE_I18N = True
+
+USE_L10N = True
+
+USE_TZ = True
+
+
+# Static files (CSS, JavaScript, Images)
+# https://docs.djangoproject.com/en/1.7/howto/static-files/
+
+STATIC_URL = '/static/'
+
+# BiJoe settings
+
+LOGGING = {
+ 'version': 1,
+ 'disable_existing_loggers': True,
+ 'formatters': {
+ 'verbose': {
+ 'format': '[%(asctime)s] %(levelname)s %(name)s.%(funcName)s: %(message)s',
+ 'datefmt': '%Y-%m-%d %a %H:%M:%S'
+ },
+ },
+ 'handlers': {
+ 'console': {
+ 'level': 'DEBUG',
+ 'class': 'logging.StreamHandler',
+ 'formatter': 'verbose',
+ },
+ },
+ 'loggers': {
+ 'bijoe': {
+ 'handlers': ['console'],
+ 'level': 'DEBUG',
+ },
+ },
+}
+
+BIJOE_SCHEMAS = ['./*.model']
+
+if 'BIJOE_SETTING_FILE' in os.environ:
+ execfile(os.environ['BIJOE_SETTINGS_FILE'])
diff --git a/bijoe/templates/bijoe/base.html b/bijoe/templates/bijoe/base.html
new file mode 100644
index 0000000..111ab85
--- /dev/null
+++ b/bijoe/templates/bijoe/base.html
@@ -0,0 +1 @@
+{% extends "gadjo/base.html" %}
diff --git a/bijoe/templates/bijoe/cube.html b/bijoe/templates/bijoe/cube.html
new file mode 100644
index 0000000..34951a7
--- /dev/null
+++ b/bijoe/templates/bijoe/cube.html
@@ -0,0 +1,183 @@
+{% extends "bijoe/base.html" %}
+{% load i18n staticfiles %}
+
+{% block extrascripts %}
+{{ form.media.css }}
+<script src="{% static "xstatic/ChartNew.js" %}"></script>
+<style>
+form {
+ float: left;
+ width: 20%;
+}
+input[type=date] {
+ width: calc(100% / 2 - 8px);
+}
+select {
+ width: 100%;
+}
+table {
+ padding-left: 2em;
+}
+input[type=submit] {
+ float: right;
+ z-index: 1;
+}
+.filter { display: inline-block; }
+</style>
+{% endblock %}
+
+{% block breadcrumb %}
+
+{% url 'homepage' as homepage_url %}
+<a href="{{ homepage_url }}">{% trans "Homepage" %}</a>
+
+{% url 'warehouse' warehouse=warehouse.name as warehouse_url %}
+<a href="{{ warehouse_url }}">{{ warehouse.label }}</a>
+
+<a>{{ cube.label }}</a>
+{% endblock %}
+
+{% block content %}
+ <form method="post">
+ {% csrf_token %}
+ <input type="submit" value="ODS" name="ods"/>
+ <h2>Mesure(s)</h2>
+ {% with field=form.measures %}
+ <p {% if field.css_classes %}class="{{ field.css_classes }}"{% endif %}>
+ {{ field }}
+ {% if field.help_text %}
+ <span class="helptext">{{ field.help_text }}</span>
+ {% endif %}
+ </p>
+ {% endwith %}
+ <h2>Regroupement(s)</h2>
+ {% with field=form.drilldown %}
+ <p {% if field.css_classes %}class="{{ field.css_classes }}"{% endif %}>
+ {{ field }}
+ {% if field.help_text %}
+ <span class="helptext">{{ field.help_text }}</span>
+ {% endif %}
+ </p>
+ {% endwith %}
+ <h2>Filtre(s)</h2
+ {% for field in form %}
+ {% if not field.is_hidden and field.name != "measures" and field.name != "drilldown" %}
+ <p {% if field.css_classes %}class="{{ field.css_classes }}"{% endif %}>
+ {{ field.label_tag }}
+ {% if field.errors %}
+ <ul class="errorlist">
+ {% for error in field.errors %}
+ <li>{{ error }}</li>
+ {% endfor %}
+ </ul>
+ {% endif %}
+ {{ field }}
+ {% if field.help_text %}
+ <span class="helptext">{{ field.help_text }}</span>
+ {% endif %}
+ </p>
+ {% endif %}
+ {% endfor %}
+ </form>
+ {% if data %}
+ <table>
+ <thead>
+ {% for dimension in drilldown %}
+ <th>{{ dimension.label.capitalize }}</th>
+ {% endfor %}
+ {% for measure in measures %}
+ <th>{{ measure.label.capitalize }}</th>
+ {% endfor %}
+ </thead>
+ <tbody>
+ {% for row in data %}
+ <tr>
+ {% for cell in row %}
+ <td>{% if cell == None %}{% trans "None" %}{% else %}{{ cell }}{% endif %}</td>
+ {% endfor %}
+ </tr>
+ {% endfor %}
+ </tbody>
+ </table>
+ <canvas style="width: 100%"></canvas>
+ {% else %}
+ <div class="big-msg-info">Veuillez choisir des mesures et des regroupements</div>
+ {% endif %}
+{% endblock %}
+
+{% block page-end %}
+{{ form.media.js }}
+<script>
+$(function () {
+ $('input[type="date"]').datepicker({
+ dateFormat: 'yy-mm-dd',
+ changeMonth: true,
+ changeYear: true,
+ },
+ $.datepicker.regional['fr']);
+ $('input, select').on('change', function () {
+ $('form').submit();
+ });
+})
+var data = {{ json|safe }};
+linedata = {labels: [], datasets: []};
+for (var i = 0; i < data.length; i++) {
+ var row = data[i];
+ var label = [];
+ var datasets = linedata.datasets;
+ for (var j = 0; j < row.coords.length; j++) {
+ label.push(row.coords[j].value)
+ }
+ label = label.join(' ');
+ linedata.labels.push(label)
+ for (var j = 0; j < row.measures.length; j++) {
+ if (datasets.length < j+1) {
+ datasets.push({data: []});
+ }
+ datasets[j].label = row.measures[j].label;
+ datasets[j].axis = j+1;
+ datasets[j].data.push(row.measures[j].value);
+ }
+}
+var ctx = $('canvas')[0].getContext("2d");
+ var option = {
+ //Boolean - Whether the scale should start at zero, or an order of magnitude down from the lowest value
+ scaleBeginAtZero : true,
+
+ //Boolean - Whether grid lines are shown across the chart
+ scaleShowGridLines : true,
+
+ //String - Colour of the grid lines
+ scaleGridLineColor : "rgba(0,0,0,.05)",
+
+ //Number - Width of the grid lines
+ scaleGridLineWidth : 1,
+
+ //Boolean - Whether to show horizontal lines (except X axis)
+ scaleShowHorizontalLines: true,
+
+ //Boolean - Whether to show vertical lines (except Y axis)
+ scaleShowVerticalLines: true,
+
+ //Boolean - If there is a stroke on each bar
+ barShowStroke : true,
+
+ //Number - Pixel width of the bar stroke
+ barStrokeWidth : 2,
+
+ //Number - Spacing between each of the X value sets
+ barValueSpacing : 5,
+
+ //Number - Spacing between data sets within X values
+ barDatasetSpacing : 1,
+
+ //String - A legend template
+ responsive: true,
+ responsiveMinHeight: 300,
+ legend: true,
+ yAxisMinimumInterval: 10,
+ inGraphDataShow: true,
+ }
+new Chart(ctx).Bar(linedata, option);
+</script>
+{% endblock %}
diff --git a/bijoe/templates/bijoe/homepage.html b/bijoe/templates/bijoe/homepage.html
new file mode 100644
index 0000000..e76f709
--- /dev/null
+++ b/bijoe/templates/bijoe/homepage.html
@@ -0,0 +1,16 @@
+{% extends "bijoe/base.html" %}
+{% load i18n %}
+
+{% block breadcrumb %}
+{% url 'homepage' as homepage_url %}
+<a>{% trans "Homepage" %}</a>
+{% endblock %}
+
+{% block content %}
+<ul class="bijoe-warehouses">
+ {% for warehouse in warehouses %}
+ {% url 'warehouse' warehouse=warehouse.name as warehouse_url %}
+ <li><a href="{{ warehouse_url }}">{{ warehouse.label }}</a></li>
+ {% endfor %}
+</ul>
+{% endblock %}
diff --git a/bijoe/templates/bijoe/warehouse.html b/bijoe/templates/bijoe/warehouse.html
new file mode 100644
index 0000000..73b26dd
--- /dev/null
+++ b/bijoe/templates/bijoe/warehouse.html
@@ -0,0 +1,17 @@
+{% extends "bijoe/base.html" %}
+{% load i18n %}
+
+{% block breadcrumb %}
+{% url 'homepage' as homepage_url %}
+<a href="{{ homepage_url }}">{% trans "Homepage" %}</a>
+<a>{{ warehouse.label }}</a>
+{% endblock %}
+
+{% block content %}
+<ul class="bijoe-cubes">
+ {% for cube in warehouse.cubes %}
+ {% url 'cube' warehouse=warehouse.name cube=cube.name as cube_url %}
+ <li><a href="{{ cube_url }}">{{ cube.label }}</a></li>
+ {% endfor %}
+</ul>
+{% endblock %}
diff --git a/bijoe/urls.py b/bijoe/urls.py
new file mode 100644
index 0000000..1e8015c
--- /dev/null
+++ b/bijoe/urls.py
@@ -0,0 +1,12 @@
+from django.conf.urls import patterns, include, url
+from django.contrib import admin
+
+import views
+
+urlpatterns = patterns(
+ '',
+ url(r'^$', views.HomepageView.as_view(), name='homepage'),
+ url(r'^(?P<warehouse>[^/]*)/$', views.WarehouseView.as_view(), name='warehouse'),
+ url(r'^(?P<warehouse>[^/]*)/(?P<cube>[^/]*)/$', views.CubeView.as_view(), name='cube'),
+ url(r'^admin/', include(admin.site.urls)),
+)
diff --git a/bijoe/utils.py b/bijoe/utils.py
new file mode 100644
index 0000000..85ec596
--- /dev/null
+++ b/bijoe/utils.py
@@ -0,0 +1,14 @@
+import glob
+import json
+
+from django.conf import settings
+
+from .schemas import Warehouse
+
+
+def get_warehouses():
+ warehouses = []
+ for pattern in settings.BIJOE_SCHEMAS:
+ for path in glob.glob(pattern):
+ warehouses.append(Warehouse.from_json(json.load(open(path))))
+ return warehouses
diff --git a/bijoe/views.py b/bijoe/views.py
new file mode 100644
index 0000000..bdd0d4d
--- /dev/null
+++ b/bijoe/views.py
@@ -0,0 +1,134 @@
+import json
+import datetime
+
+from django.views.generic import TemplateView, FormView
+from django.http import Http404
+from django.utils import formats
+from django.http import HttpResponse
+
+from .utils import get_warehouses
+from .engine import Engine
+from .forms import CubeForm
+from .ods import Workbook
+
+
+class HomepageView(TemplateView):
+ template_name = 'bijoe/homepage.html'
+
+ def get_context_data(self, **kwargs):
+ ctx = super(HomepageView, self).get_context_data(**kwargs)
+ ctx['warehouses'] = get_warehouses()
+ return ctx
+
+
+class WarehouseView(TemplateView):
+ template_name = 'bijoe/warehouse.html'
+
+ def get_context_data(self, **kwargs):
+ ctx = super(WarehouseView, self).get_context_data(**kwargs)
+ try:
+ ctx['warehouse'] = [warehouse for warehouse in get_warehouses() if warehouse.name ==
+ self.kwargs['warehouse']][0]
+ except IndexError:
+ raise Http404
+ return ctx
+
+
+class CubeMixin(object):
+ def get_data(self, cleaned_data, stringify=True):
+ cleaned_data = cleaned_data
+ filters = []
+ for kw, values in cleaned_data.iteritems():
+ if values and kw.startswith('filter__'):
+ dimension_name = kw[8:]
+ filters.append((dimension_name, values))
+ measures = cleaned_data.get('measures', [])
+ drilldown = cleaned_data.get('drilldown', [])
+ data = []
+ for row in self.cube.query(filters, drilldown, measures):
+ data_row = []
+ for cell, value in row:
+ if stringify:
+ if cell.type is float:
+ value = formats.number_format(value, use_l10n=True) + u' %'
+ if isinstance(value, datetime.timedelta):
+ s = ''
+ if value.days:
+ s += '%d jour(s)' % value.days
+ if value.seconds / 3600:
+ s += ' %d heure(s)' % (value.seconds / 3600)
+ if not s:
+ s = 'moins d\'1 heure'
+ value = s
+ data_row.append(value)
+ data.append(data_row)
+ return data
+
+
+class CubeView(CubeMixin, FormView):
+ template_name = 'bijoe/cube.html'
+ form_class = CubeForm
+
+ def dispatch(self, request, *args, **kwargs):
+ try:
+ self.warehouse = Engine([warehouse for warehouse in get_warehouses() if warehouse.name
+ == self.kwargs['warehouse']][0])
+ except IndexError:
+ raise Http404
+ try:
+ self.cube = self.warehouse[self.kwargs['cube']]
+ except KeyError:
+ raise Http404
+ return super(CubeView, self).dispatch(request, *args, **kwargs)
+
+ def get_form_kwargs(self):
+ kwargs = super(CubeView, self).get_form_kwargs()
+ kwargs['cube'] = self.cube
+ return kwargs
+
+ def form_valid(self, form):
+ if 'ods' in self.request.POST:
+ return self.ods(form)
+ else:
+ return self.form_invalid(form)
+
+ def ods(self, form):
+ workbook = Workbook()
+ sheet = workbook.add_sheet(self.cube.label)
+ ctx = self.get_context_data(form=form)
+ for j, m in enumerate(ctx['drilldown'] + ctx['measures']):
+ sheet.write(0, j, m.label)
+ for i, row in enumerate(ctx['data']):
+ for j, cell in enumerate(row):
+ sheet.write(i + 1, j, unicode(cell))
+ response = HttpResponse(content_type='application/vnd.oasis.opendocument.spreadsheet')
+ response['Content-Disposition'] = 'attachment; filename=%s.ods' % self.cube.name
+ workbook.save(response)
+ return response
+
+ def get_context_data(self, **kwargs):
+ ctx = super(CubeView, self).get_context_data(**kwargs)
+ ctx['warehouse'] = self.warehouse
+ ctx['cube'] = self.cube
+
+ form = ctx['form']
+ if form.is_valid():
+ ctx['data'] = self.get_data(form.cleaned_data)
+ ctx['measures'] = [self.cube.measures[measure] for measure in
+ form.cleaned_data['measures']]
+ ctx['drilldown'] = [self.cube.dimensions[dimension] for dimension in
+ form.cleaned_data['drilldown']]
+ json_data = []
+ for row in self.get_data(form.cleaned_data, stringify=False):
+ coords = []
+ for dimension, cell in zip(ctx['drilldown'], row):
+ coords.append({'label': dimension.label, 'value': cell})
+ measures = []
+ for measure, cell in zip(ctx['measures'], row[len(ctx['drilldown']):]):
+ if isinstance(cell, datetime.timedelta):
+ cell = cell.days + cell.seconds / 86400.
+ measures.append({'label': measure.label, 'value': cell})
+ json_data.append({'coords': coords, 'measures': measures})
+
+ ctx['json'] = json.dumps(json_data, indent=2)
+ return ctx
diff --git a/bijoe/wsgi.py b/bijoe/wsgi.py
new file mode 100644
index 0000000..907103b
--- /dev/null
+++ b/bijoe/wsgi.py
@@ -0,0 +1,14 @@
+"""
+WSGI config for bijoe project.
+
+It exposes the WSGI callable as a module-level variable named ``application``.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/1.7/howto/deployment/wsgi/
+"""
+
+import os
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "bijoe.settings")
+
+from django.core.wsgi import get_wsgi_application
+application = get_wsgi_application()
diff --git a/setup.py b/setup.py
index 3229d20..e6c906e 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@ def get_version():
return '0.0.0'
-setup(name="wcs-olap",
+setup(name="publik-bi",
version=get_version(),
license="AGPLv3+",
description="Export w.c.s. data to an OLAP cube",
@@ -54,8 +54,10 @@ setup(name="wcs-olap",
maintainer_email="bdauvergne@entrouvert.com",
packages=find_packages(),
include_package_data=True,
- install_requires=['requests','psycopg2', 'isodate'],
+ install_requires=['requests', 'django', 'psycopg2', 'isodate', 'Django-Select2',
+ 'XStatic-ChartNew.js'],
entry_points={
'console_scripts': ['wcs-olap=wcs_olap.cmd:main'],
},
+ scripts=['bijoe-ctl'],
cmdclass={'sdist': eo_sdist})
diff --git a/tests/test_bijoe_schemas.py b/tests/test_bijoe_schemas.py
new file mode 100644
index 0000000..9b426d5
--- /dev/null
+++ b/tests/test_bijoe_schemas.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+from bijoe.schemas import Warehouse
+
+
+def test_simple_parsing():
+ Warehouse.from_json({
+ 'name': 'coin',
+ 'label': 'coin',
+ 'pg_dsn': 'dbname=zozo',
+ 'search_path': ['cam', 'public'],
+ 'cubes': [
+ {
+ 'name': 'all_formdata',
+ 'label': 'Tous les formulaires',
+ 'fact_table': 'formdata',
+ 'key': 'id',
+ 'joins': [
+ {
+ 'name': 'formdef',
+ 'master': '{fact_table}.formdef_id',
+ 'table': 'formdef',
+ 'detail': 'formdef.id',
+ }
+ ],
+ 'dimensions': [
+ {
+ 'label': 'formulaire',
+ 'name': 'formdef',
+ 'type': 'integer',
+ 'join': ['formdef'],
+ 'value': 'formdef.id',
+ 'value_label': 'formdef.label',
+ 'order_by': 'formdef.label'
+ },
+ {
+ 'name': 'receipt_time',
+ 'label': 'date de soumission',
+ 'join': ['receipt_time'],
+ 'type': 'date',
+ 'value': 'receipt_time.date'
+ }
+ ],
+ 'measures': [
+ {
+ 'type': 'integer',
+ 'label': 'Nombre de demandes',
+ 'expression': 'count({fact_table}.id)',
+ 'name': 'count'
+ },
+ {
+ 'type': 'integer',
+ 'label': u'Délai de traitement',
+ 'expression': 'avg((to_char(endpoint_delay, \'9999.999\') || \' days\')::interval)',
+ 'name': 'avg_endpoint_delay'
+ },
+ {
+ 'type': 'percent',
+ 'label': 'Pourcentage',
+ 'expression': 'count({fact_table}.id) * 100. / (select count({fact_table}.id) from {table_expression} where {where_conditions})',
+ 'name': 'percentage'
+ }
+ ]
+ }
+ ],
+ })
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..9e61007
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,19 @@
+# Tox (http://tox.testrun.org/) is a tool for running tests
+# in multiple virtualenvs. This configuration file will run the
+# test suite on all supported python versions. To use it, "pip install tox"
+# and then run "tox" from this directory.
+
+[tox]
+toxworkdir = {env:TMPDIR:/tmp}/tox-{env:USER}/publik-bi/
+
+[testenv]
+usedevelop = true
+setenv =
+ coverage: COVERAGE=--junit-xml=junit.xml --cov=src --cov-report xml
+deps =
+ coverage
+ pytest
+ pytest-cov
+ pytest-random
+commands =
+ py.test {env:COVERAGE:} {posargs:--random tests}
diff --git a/wcs_olap/cmd.py b/wcs_olap/cmd.py
index 4aadf66..97985dc 100644
--- a/wcs_olap/cmd.py
+++ b/wcs_olap/cmd.py
@@ -43,10 +43,13 @@ def main2():
'postgresql DB', add_help=False)
parser.add_argument('config_path', nargs='?', default=None)
group = parser.add_mutually_exclusive_group()
+ parser.add_argument('--no-feed', dest='feed', help='only produce the model',
+ action='store_false', default=True)
group.add_argument("-a", "--all", help="synchronize all wcs", action='store_true',
default=False)
group.add_argument('--url', help='url of the w.c.s. instance', required=False, default=None)
args, rest = parser.parse_known_args()
+ feed = args.feed
config = get_config(path=args.config_path)
# list all known urls
urls = [url for url in config.sections() if url.startswith('http://') or
@@ -90,7 +93,7 @@ def main2():
verify=defaults.get('verify', 'True') == 'True')
logger.info('starting synchronizing w.c.s. at %r with PostgreSQL at %s', url, pg_dsn)
feeder = WcsOlapFeeder(api=api, schema=schema, pg_dsn=pg_dsn, logger=logger,
- config=defaults)
+ config=defaults, do_feed=feed)
feeder.feed()
logger.info('finished')
defaults = {}
diff --git a/wcs_olap/feeder.py b/wcs_olap/feeder.py
index 836f56d..85ed77e 100644
--- a/wcs_olap/feeder.py
+++ b/wcs_olap/feeder.py
@@ -43,7 +43,7 @@ class Context(object):
class WcsOlapFeeder(object):
- def __init__(self, api, pg_dsn, schema, logger=None, config=None):
+ def __init__(self, api, pg_dsn, schema, logger=None, config=None, do_feed=True):
self.api = api
self.logger = logger or Whatever()
self.schema = schema
@@ -53,6 +53,7 @@ class WcsOlapFeeder(object):
self.formdefs = api.formdefs
self.roles = api.roles
self.categories = api.categories
+ self.do_feed = do_feed
self.ctx = Context()
self.ctx.push({
'schema': self.schema,
@@ -72,275 +73,139 @@ class WcsOlapFeeder(object):
self.model = {
'label': self.config.get('cubes_label', schema),
'name': schema,
- 'browser_options': {
- 'schema': schema,
- },
+ 'search_path': [schema, 'public'],
+ 'pg_dsn': pg_dsn,
+ 'cubes': [],
+ }
+ cube = {
+ 'name': 'all_formdata',
+ 'label': u'Tous les formulaires',
+ 'fact_table': 'formdata',
+ 'key': 'id',
'joins': [
{
'name': 'receipt_time',
+ 'table': 'dates',
+ 'detail': 'date',
'master': 'receipt_time',
- 'detail': {
- 'table': 'dates',
- 'column': 'date',
- 'schema': 'public',
- },
- 'method': 'detail',
- 'alias': 'dates',
},
{
'name': 'channel',
+ 'table': 'channel',
'master': 'channel_id',
- 'detail': '{channel_table}.id',
- 'method': 'detail',
- },
- {
- 'name': 'role',
- 'detail': '{role_table}.id',
- 'method': 'detail',
+ 'detail': 'id',
},
{
'name': 'formdef',
+ 'table': 'formdef',
'master': 'formdef_id',
- 'detail': '{form_table}.id',
- 'method': 'detail',
+ 'detail': 'id',
},
{
'name': 'category',
- 'master': '{form_table}.category_id',
- 'detail': '{category_table}.id',
+ 'table': 'category',
+ 'master': 'formdef.category_id',
+ 'detail': 'id',
+ 'kind': 'left',
},
{
'name': 'hour',
+ 'table': 'hour',
'master': 'hour_id',
- 'detail': '{hour_table}.id',
- 'method': 'detail',
+ 'detail': 'id',
},
{
'name': 'generic_status',
+ 'table': 'status',
'master': 'generic_status_id',
- 'detail': '{generic_status_table}.id',
- 'method': 'detail',
+ 'detail': 'id',
},
],
'dimensions': [
{
- 'label': 'date de soumission',
'name': 'receipt_time',
- 'role': 'time',
- 'levels': [
- {
- 'name': 'year',
- 'label': 'année',
- 'role': 'year',
- 'order_attribute': 'year',
- 'order': 'asc',
- },
- {
- 'name': 'quarter',
- 'order_attribute': 'quarter',
- 'label': 'trimestre',
- 'role': 'quarter',
- },
- {
- 'name': 'month',
- 'label': 'mois',
- 'role': 'month',
- 'attributes': ['month', 'month_name'],
- 'order_attribute': 'month',
- 'label_attribute': 'month_name',
- 'order': 'asc',
- },
- {
- 'name': 'week',
- 'label': 'semaine',
- 'role': 'week',
- },
- {
- 'name': 'day',
- 'label': 'jour',
- 'role': 'day',
- 'order': 'asc',
- },
- {
- 'name': 'dow',
- 'label': 'jour de la semaine',
- 'attributes': ['dow', 'dow_name'],
- 'order_attribute': 'dow',
- 'label_attribute': 'dow_name',
- 'order': 'asc',
- },
- ],
- 'hierarchies': [
- {
- 'name': 'default',
- 'label': 'par défaut',
- 'levels': ['year', 'month', 'day']
- },
- {
- 'name': 'quarterly',
- 'label': 'par trimestre',
- 'levels': ['year', 'quarter']
- },
- {
- 'name': 'weekly',
- 'label': 'par semaine',
- 'levels': ['year', 'week']
- },
- {
- 'name': 'dowly',
- 'label': 'par jour de la semaine',
- 'levels': ['dow']
- },
- ]
+ 'label': 'date de la demande',
+ 'join': ['receipt_time'],
+ 'type': 'date',
+ 'value': 'receipt_time.date',
},
{
- 'label': 'canaux',
- 'name': 'channels',
+ 'name': 'channel',
+ 'label': 'canal',
+ 'join': ['channel'],
+ 'type': 'integer',
+ 'value': 'channel.id',
+ 'value_label': 'channel.label',
},
{
- 'label': 'catégories',
- 'name': 'categories',
+ 'name': 'category',
+ 'label': 'catégorie',
+ 'join': ['formdef', 'category'],
+ 'type': 'integer',
+ 'value': 'category.id',
+ 'value_label': 'category.label',
},
{
- 'label': 'formulaire',
'name': 'formdef',
+ 'label': 'formulaire',
+ 'join': ['formdef'],
+ 'type': 'integer',
+ 'value': 'formdef.id',
+ 'value_label': 'formdef.label',
},
{
- 'label': 'statuts génériques',
- 'name': 'generic_statuses',
+ 'name': 'generic_status',
+ 'label': 'statut générique',
+ 'join': ['generic_status'],
+ 'type': 'integer',
+ 'value': 'generic_status.id',
+ 'value_label': 'generic_status.label',
},
{
+ 'name': 'hour',
'label': 'heure',
- 'name': 'hours',
- 'levels': [
- {
- 'name': 'hours',
- 'attributes': ['hour_id', 'hour_label'],
- 'order_attribute': 'hour_id',
- 'label_attribute': 'hour_label',
- }
- ]
- },
+ 'join': ['hour'],
+ 'type': 'integer',
+ 'value': 'hour.id',
+ 'filter': False,
+ }
],
- 'mappings': {
- 'receipt_time.year': {
- 'table': 'dates',
- 'column': 'date',
- 'schema': 'public',
- 'extract': 'year',
- },
- 'receipt_time.month': {
- 'table': 'dates',
- 'column': 'date',
- 'schema': 'public',
- 'extract': 'month'
- },
- 'receipt_time.month_name': {
- 'table': 'dates',
- 'schema': 'public',
- 'column': 'month'
- },
- 'receipt_time.week': {
- 'table': 'dates',
- 'column': 'date',
- 'schema': 'public',
- 'extract': 'week'
- },
- 'receipt_time.day': {
- 'table': 'dates',
- 'column': 'date',
- 'schema': 'public',
- 'extract': 'day'
- },
- 'receipt_time.dow': {
- 'table': 'dates',
- 'column': 'date',
- 'schema': 'public',
- 'extract': 'dow'
+ 'measures': [
+ {
+ 'name': 'count',
+ 'label': 'nombre de demandes',
+ 'type': 'integer',
+ 'expression': 'count({fact_table}.id)',
},
- 'receipt_time.dow_name': {
- 'table': 'dates',
- 'schema': 'public',
- 'column': 'day',
+ {
+ 'name': 'avg_endpoint_delay',
+ 'label': 'délai de traitement moyen',
+ 'type': 'duration',
+ 'expression': 'avg(endpoint_delay)',
},
- 'receipt_time.quarter': {
- 'table': 'dates',
- 'column': 'date',
- 'schema': 'public',
- 'extract': 'quarter'
+ {
+ 'name': 'max_endpoint_delay',
+ 'label': 'délai de traitement maximum',
+ 'type': 'duration',
+ 'expression': 'max(endpoint_delay)',
},
- 'formdef': 'formdef.label',
- 'channels': 'channel.label',
- 'categories': 'category.label',
- 'generic_statuses': 'status.label',
- 'hours.hour_label': '{hour_table}.label',
- 'hours.hour_id': '{hour_table}.id',
- },
- 'cubes': [
{
- 'name': schema + '_formdata',
- 'label': 'Toutes les demandes (%s)' % schema,
- 'key': 'id',
- 'fact': 'formdata',
- 'dimensions': [
- 'receipt_time',
- 'hours',
- 'channels',
- 'categories',
- 'formdef',
- 'generic_statuses',
- ],
- 'joins': [
- {
- 'name': 'receipt_time',
- },
- {
- 'name': 'hour',
- },
- {
- 'name': 'channel',
- },
- {
- 'name': 'formdef',
- },
- {
- 'name': 'category',
- },
- {
- 'name': 'generic_status',
- },
- ],
- 'measures': [
- {
- 'name': 'endpoint_delay',
- 'label': 'délai de traitement',
- 'nonadditive': 'all',
- },
- ],
- 'aggregates': [
- {
- 'name': 'record_count',
- 'label': 'nombre de demandes',
- 'function': 'count'
- },
- {
- 'name': 'endpoint_delay_max',
- 'label': 'délai de traitement maximum',
- 'measure': 'endpoint_delay',
- 'function': 'max',
- },
- {
- 'name': 'endpoint_delay_avg',
- 'label': 'délai de traitement moyen',
- 'measure': 'endpoint_delay',
- 'function': 'avg',
- },
- ],
+ 'name': 'min_endpoint_delay',
+ 'label': 'délai de traitement minimum',
+ 'type': 'duration',
+ 'expression': 'min(endpoint_delay)',
},
- ],
+ {
+ 'name': 'percent',
+ 'label': 'Pourcentage des demandes',
+ 'type': 'percent',
+ "expression": 'count({fact_table}.id) * 100. '
+ '/ (select count({fact_table}.id) from {table_expression} '
+ 'where {where_conditions})',
+ }
+ ]
}
- # apply table names
- self.model = self.tpl(self.model)
+ self.model['cubes'].append(cube)
self.base_cube = self.model['cubes'][0]
def hash_table_name(self, table_name):
@@ -473,39 +338,32 @@ class WcsOlapFeeder(object):
['id', 'serial primary key'],
['formdef_id', 'smallint REFERENCES {form_table} (id)'],
['receipt_time', 'date'],
- ['year_id', 'smallint REFERENCES {year_table} (id)'],
- ['month_id', 'smallint REFERENCES {month_table} (id)'],
['hour_id', 'smallint REFERENCES {hour_table} (id)'],
- ['day_id', 'smallint REFERENCES {day_table} (id)'],
- ['dow_id', 'smallint REFERENCES {dow_table} (id)'],
['channel_id', 'smallint REFERENCES {channel_table} (id)'],
['backoffice', 'boolean'],
['generic_status_id', 'smallint REFERENCES {generic_status_table} (id)'],
- ['endpoint_delay', 'real'],
+ ['endpoint_delay', 'interval'],
]
self.comments = {
- 'formdef_id': u'dim$formulaire',
- 'receipt_time': u'time$date de réception',
- 'year_id': u'dim$année',
- 'month_id': u'dim$mois',
- 'hour_id': u'dim$heure',
- 'day_id': u'dim$jour',
- 'dow_id': u'dim$jour de la semaine',
- 'channel_id': u'dim$canal',
- 'backoffice': u'dim$soumission backoffce',
- 'generic_status_id': u'dim$statut générique',
- 'endpoint_delay': u'measure$délai de traitement',
+ 'formdef_id': u'formulaire',
+ 'receipt_time': u'date de réception',
+ 'hour_id': u'heure',
+ 'channel_id': u'canal',
+ 'backoffice': u'soumission backoffce',
+ 'generic_status_id': u'statut générique',
+ 'endpoint_delay': u'délai de traitement',
}
self.create_table('{generic_formdata_table}', self.columns)
for at, comment in self.comments.iteritems():
self.ex('COMMENT ON COLUMN {generic_formdata_table}.%s IS %%s' % at, vars=(comment,))
def feed(self):
- self.do_schema()
- self.do_base_table()
+ if self.do_feed:
+ self.do_schema()
+ self.do_base_table()
for formdef in self.formdefs:
try:
- formdef_feeder = WcsFormdefFeeder(self, formdef)
+ formdef_feeder = WcsFormdefFeeder(self, formdef, do_feed=self.do_feed)
formdef_feeder.feed()
except WcsApiError, e:
# ignore authorization errors
@@ -513,14 +371,19 @@ class WcsOlapFeeder(object):
and e.args[2].response.status_code == 403):
continue
self.logger.error('failed to retrieve formdef %s', formdef.slug)
+ if 'cubes_model_dirs' in self.config:
+ model_path = os.path.join(self.config['cubes_model_dirs'], '%s.model' % self.schema)
+ with open(model_path, 'w') as f:
+ json.dump(self.model, f, indent=2, sort_keys=True)
class WcsFormdefFeeder(object):
- def __init__(self, olap_feeder, formdef):
+ def __init__(self, olap_feeder, formdef, do_feed=True):
self.olap_feeder = olap_feeder
self.formdef = formdef
self.status_mapping = {}
self.items_mappings = {}
+ self.do_feed = do_feed
self.fields = []
@property
@@ -614,25 +477,16 @@ class WcsFormdefFeeder(object):
continue
status = data.formdef.schema.workflow.statuses_map[data.workflow.status.id]
- if data.endpoint_delay:
- endpoint_delay = (data.endpoint_delay.days + float(data.endpoint_delay.seconds) /
- 86400.)
- else:
- endpoint_delay = None
row = {
'formdef_id': self.formdef_sql_id,
'receipt_time': data.receipt_time,
- 'year_id': data.receipt_time.year,
- 'month_id': data.receipt_time.month,
- 'day_id': data.receipt_time.day,
'hour_id': data.receipt_time.hour,
- 'dow_id': data.receipt_time.weekday(),
'channel_id': self.channel_to_id[data.submission.channel.lower()],
'backoffice': data.submission.backoffice,
# FIXME "En cours"/2 is never used
'generic_status_id': 3 if status.endpoint else 1,
'status_id': self.status_mapping[data.workflow.status.id],
- 'endpoint_delay': endpoint_delay,
+ 'endpoint_delay': data.endpoint_delay,
}
# add form fields value
for field in self.fields:
@@ -673,78 +527,78 @@ class WcsFormdefFeeder(object):
})
# create cube
- self.cube = copy.deepcopy(self.base_cube)
- self.cube.update({
- 'name': self.schema + '_' + self.table_name,
+ cube = self.cube = copy.deepcopy(self.base_cube)
+ cube.update({
+ 'name': self.table_name,
'label': self.formdef.schema.name,
- 'fact': self.table_name,
+ 'fact_table': self.table_name,
+ 'key': 'id',
})
+ cube['dimensions'] = [dimension for dimension in cube['dimensions']
+ if dimension['name'] not in ('category', 'formdef')]
+
# add dimension for status
- self.cube['joins'].append({
+ cube['joins'].append({
+ 'name': 'status',
+ 'table': self.status_table_name,
'master': 'status_id',
- 'detail': '%s.id' % self.status_table_name,
- 'method': 'detail',
+ 'detail': 'id',
})
- dim_name = '%s_%s' % (self.table_name, 'status')
- self.model['dimensions'].append({
- 'name': dim_name,
+ cube['dimensions'].append({
+ 'name': 'status',
'label': 'statut',
- 'levels': [
- {
- 'name': 'status',
- 'attributes': ['status_id', 'status_label'],
- 'order_attribute': 'status_id',
- 'label_attribute': 'status_label',
- },
- ],
+ 'join': ['status'],
+ 'type': 'integer',
+ 'value': 'status.id',
+ 'value_label': 'status.label',
})
- self.model['mappings']['%s.status_id' % dim_name] = '%s.id' % self.status_table_name
- self.model['mappings']['%s.status_label' % dim_name] = '%s.label' % self.status_table_name
- self.cube['dimensions'].append(dim_name)
# add dimension for function
for function, name in self.formdef.schema.workflow.functions.iteritems():
at = 'function_%s' % slugify(function)
- dim_name = '%s_function_%s' % (self.table_name, slugify(function))
- self.cube['joins'].append({
+ cube['joins'].append({
+ 'name': at,
+ 'table': 'role',
'master': at,
- 'detail': self.tpl('{role_table}.id'),
- 'alias': at,
+ 'detail': 'id',
})
- self.model['dimensions'].append({
- 'name': dim_name,
+ cube['dimensions'].append({
+ 'name': at,
'label': u'fonction %s' % name,
+ 'join': [at],
+ 'type': 'integer',
+ 'value': '%s.id' % at,
+ 'value_label': '%s.label' % at,
+ 'filter': False,
})
- self.model['mappings'][dim_name] = '%s.label' % at
- self.cube['dimensions'].append(dim_name)
# add dimensions for item fields
for field in self.fields:
if field.type != 'item':
continue
table_name = self.hash_table_name('{formdata_table}_field_%s' % field.varname)
- self.cube['joins'].append({
+ cube['joins'].append({
+ 'name': field.varname,
+ 'table': 'table_name',
'master': 'field_%s' % field.varname,
- 'detail': '%s.id' % table_name,
- 'method': 'detail',
+ 'detail': 'id' % table_name,
})
- dim_name = '%s_%s' % (self.table_name. field.varname)
- self.model['dimensions'].append({
- 'name': dim_name,
+ cube['dimensions'].append({
+ 'name': field.varname,
'label': field.label,
+ 'join': [field.varname],
+ 'type': 'integer',
+ 'value': '%s.id' % field.varname,
+ 'value_label': '%s.label' % field.varname,
+ 'filter': field.in_filters,
})
- self.model['mappings'][dim_name] = '%s.label' % table_name
- self.cube['dimensions'].append(dim_name)
-
- self.model['cubes'].append(self.cube)
- try:
- self.logger.info('feed formdef %s', self.formdef.slug)
- self.do_statuses()
- self.do_data_table()
- self.do_data()
- finally:
- self.olap_feeder.ctx.pop()
- if 'cubes_model_dirs' in self.config:
- model_path = os.path.join(self.config['cubes_model_dirs'], '%s.json' % self.schema)
- with open(model_path, 'w') as f:
- json.dump(self.model, f, indent=2, sort_keys=True)
+
+ self.model['cubes'].append(cube)
+ if self.do_feed:
+ try:
+ self.logger.info('feed formdef %s', self.formdef.slug)
+ self.do_statuses()
+ self.do_data_table()
+ self.do_data()
+ finally:
+ self.olap_feeder.ctx.pop()