This repository has been archived on 2023-02-21. You can view files and clone it, but cannot push or open issues or pull requests.
publik-bi/bijoe/schemas.py

298 lines
8.5 KiB
Python

# -*- coding: utf-8 -*-
import datetime
import decimal
import collections
TYPE_MAP = {
'duration': datetime.timedelta,
'date': datetime.date,
'integer': int,
'decimal': decimal.Decimal,
'percent': float,
}
class SchemaError(Exception):
pass
def type_to_json(t):
for k, v in TYPE_MAP.items():
if t is v:
return k
def type_cast(t):
if isinstance(t, type):
return t
else:
return TYPE_MAP[t]
type_cast.to_json = type_to_json
class Base(object):
__types__ = {}
def __init__(self, **kwargs):
for k, v in kwargs.iteritems():
setattr(self, k, v)
@classmethod
def slots(cls):
s = set()
for cls in reversed(cls.__mro__):
if hasattr(cls, '__slots__'):
s.update(cls.__slots__)
return s
@classmethod
def types(cls):
d = {}
for cls in reversed(cls.__mro__):
if hasattr(cls, '__types__'):
d.update(cls.__types__)
return d
@classmethod
def from_json(cls, d):
assert hasattr(d, 'keys')
slots = cls.slots()
assert set(d.keys()) <= set(slots), \
'given keys %r does not match %s.__slots__: %r' % (d.keys(), cls.__name__, slots)
types = cls.types()
kwargs = {}
for key in slots:
assert key in d or hasattr(cls, key), \
'%s.%s is is a mandatory attribute' % (cls.__name__, key)
if not key in d:
continue
value = d[key]
if key in types:
kls = types[key]
if isinstance(kls, list):
kls = kls[0]
if hasattr(kls, 'from_json'):
value = [kls.from_json(v) for v in value]
else:
value = [kls(v) for v in value]
elif hasattr(kls, 'from_json'):
value = kls.from_json(value)
else:
value = kls(value)
kwargs[key] = value
return cls(**kwargs)
def to_json(self):
d = {}
types = self.types()
for attr in self.slots():
try:
v = getattr(self, attr)
except AttributeError:
pass
else:
if attr in types and hasattr(types[attr], 'to_json'):
v = types[attr].to_json(v)
if isinstance(v, list):
v = [x.to_json() if hasattr(x, 'to_json') else x for x in v]
d[attr] = v
return d
def __repr__(self):
kwargs = ['%s=%r' % (key, getattr(self, key)) for key in self.slots() if hasattr(self, key)]
return '<%s %s>' % (self.__class__.__name__, ' '.join(kwargs))
class Measure(Base):
__slots__ = ['name', 'label', 'type', 'expression']
__types__ = {
'name': str,
'label': unicode,
'type': type_cast,
'expression': str,
}
class Dimension(Base):
__slots__ = ['name', 'label', 'type', 'join', 'value', 'value_label',
'order_by', 'group_by', 'filter_in_join', 'filter']
__types__ = {
'name': str,
'label': unicode,
'type': type_cast,
'join': [str],
'value': str,
'value_label': str,
'order_by': str,
'group_by': str,
'filter': bool,
}
label = None
value_label = None
order_by = None
group_by = None
join = None
filter = True
@property
def dimensions(self):
if self.type is datetime.date:
return [
self,
Dimension(
label=u'année (%s)' % self.label,
name=self.name + '__year',
type='integer',
join=self.join,
value='EXTRACT(year from %s)::integer' % self.value,
filter=False),
Dimension(
label=u'mois (%s)' % self.label,
name=self.name + '__month',
type='integer',
join=self.join,
value='EXTRACT(month from %s)' % self.value,
value_label='to_char(date_trunc(\'month\', %s), \'TMmonth\')' % self.value,
group_by='EXTRACT(month from %s), '
'to_char(date_trunc(\'month\', %s), \'TMmonth\')'
% (self.value, self.value),
filter=False),
Dimension(
label=u'jour de la semaine (%s)' % self.label,
name=self.name + '__dow',
type='integer',
join=self.join,
value='EXTRACT(dow from %s)' % self.value,
value_label='to_char(date_trunc(\'week\', current_date)::date '
'+ EXTRACT(dow from %s)::integer, \'TMday\')' % self.value,
filter=False)
]
return [self]
def filter(self, filter_values):
if self.type is datetime.date:
assert len(filter_values) == 2
filters = []
values = []
try:
if filter_values[0]:
filters.append('%s >= %%s' % self.value)
values.append(filter_values[0])
except IndexError:
pass
try:
if filter_values[1]:
filters.append('%s <= %%s' % self.value)
values.append(filter_values[1])
except IndexError:
pass
return ' AND '.join(filters), values
else:
if not filter_values:
return '', []
if self.type == 'integer':
values = map(int, filter_values)
else:
values = filter_values
s = ', '.join(['%s'] * len(values))
return '%s IN (%s)' % (self.value, s), values
def join_kind(kind):
if kind not in ('inner', 'left', 'right'):
raise ValueError('bad joind kind: %s' % kind)
return kind
class Join(Base):
__slots__ = ['name', 'table', 'master', 'detail', 'kind']
__types__ = {
'name': str,
'table': str,
'master': str,
'detail': str,
'kind': join_kind,
}
kind = 'right'
class Cube(Base):
__slots__ = ['name', 'label', 'fact_table', 'key', 'joins', 'dimensions', 'measures']
__types__ = {
'name': str,
'label': unicode,
'fact_table': str,
'key': str,
'joins': [Join],
'dimensions': [Dimension],
'measures': [Measure],
}
joins = ()
dimensions = ()
measures = ()
def check(self):
names = collections.Counter()
names.update(join.name for join in self.joins)
names.update(dimension.name for dimension in self.dimensions)
names.update(measure.name for measure in self.measures)
duplicates = [k for k, v in names.iteritems() if v > 1]
if duplicates:
raise SchemaError(
'More than one join, dimension or measure with name(s) %s' % ', '.join(duplicates))
@property
def all_dimensions(self):
for dimension in self.dimensions:
for sub_dimension in dimension.dimensions:
yield sub_dimension
def get_dimension(self, name):
for dimension in self.dimensions:
for sub_dimension in dimension.dimensions:
if sub_dimension.name == name:
return sub_dimension
raise KeyError
def get_join(self, name):
for join in self.joins:
if join.name == name:
return join
raise KeyError
def get_measure(self, name):
for measure in self.measures:
if measure.name == name:
return measure
raise KeyError
class Warehouse(Base):
__slots__ = ['name', 'label', 'pg_dsn', 'search_path', 'cubes']
__types__ = {
'name': str,
'label': unicode,
'pg_dsn': str,
'search_path': [str],
'cubes': [Cube],
'search_path': [str],
}
def check(self):
names = collections.Counter(cube.name for cube in self.cubes)
duplicates = [k for k, v in names.iteritems() if v > 1]
if duplicates:
raise SchemaError('More than one cube with name(s) %s' % ', '.join(duplicates))
def get_cube(self, name):
for cube in self.cubes:
if cube.name == name:
return cube
raise KeyError