diff --git a/tests/conftest.py b/tests/conftest.py index 6d62535..64ce440 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,7 +14,6 @@ import pytest import utils - Wcs = namedtuple('Wcs', ['url', 'appdir', 'pid']) @@ -101,7 +100,9 @@ formdef.fields = [ items=['foo', 'bar', 'baz'], varname='item'), fields.BoolField(id='3', label='3rd field', type='bool', varname='bool'), fields.ItemField(id='4', label='4rth field', type='item', varname='itemOpen'), - fields.StringField(id='5', label='5th field', type='string', anonymise=False, varname='stringCaseSensitiveé'), + fields.StringField(id='5', label='5th field', type='string', anonymise=False, varname='stringCaseSensitive-é'), + fields.BoolField(id='6', label='6th field duplicate', type='bool', varname='duplicate'), + fields.StringField(id='7', label='7th field duplicate', type='string', anonymise=False, varname='duplicate'), ] formdef.store() diff --git a/tests/olap.model b/tests/olap.model index 553f485..8d4dc1e 100644 --- a/tests/olap.model +++ b/tests/olap.model @@ -279,9 +279,9 @@ { "filter": true, "label": "5th field", - "name": "stringCaseSensitiveé", + "name": "stringCaseSensitive-é", "type": "string", - "value": "\"field_stringCaseSensitiveé\"" + "value": "\"field_stringCaseSensitive-é\"" } ], "fact_table" : "\"formdata_demande\"", @@ -395,7 +395,11 @@ "type" : "point" } ], - "name" : "formdata_demande" + "name" : "formdata_demande", + "warnings": [ + "le champ « 6th field duplicate » a un nom de variable dupliqué « duplicate »", + "le champ « 7th field duplicate » a un nom de variable dupliqué « duplicate »" + ] } ], "label" : "olap", diff --git a/tests/test_wcs.py b/tests/test_wcs.py index 4e82e19..a636dce 100644 --- a/tests/test_wcs.py +++ b/tests/test_wcs.py @@ -60,7 +60,7 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog): ('formdata_demande', 'field_item'), ('formdata_demande', 'field_bool'), ('formdata_demande', 'field_itemOpen'), - ('formdata_demande', 'field_stringCaseSensitive\xe9'), + ('formdata_demande', 'field_stringCaseSensitive-\xe9'), ('formdata_demande', 'function__receiver'), ('formdata_demande_field_item', 'id'), ('formdata_demande_field_item', 'label'), diff --git a/wcs_olap/feeder.py b/wcs_olap/feeder.py index dc947ce..614b574 100644 --- a/wcs_olap/feeder.py +++ b/wcs_olap/feeder.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals +from collections import OrderedDict, Counter import datetime import six import copy @@ -610,25 +611,11 @@ class WcsFormdefFeeder(object): self.formdef.schema.name]) self.formdef_sql_id = self.cur.fetchone()[0] - columns = [['status_id', 'smallint REFERENCES {status_table} (id)']] - - comments = {} - - # compute list of fields - fields = self.formdef.schema.fields - if self.formdef.schema.workflow: - fields += self.formdef.schema.workflow.fields + columns = OrderedDict() + columns['status_id'] = {'sql_col_name': 'status_id', 'sql_col_def': 'smallint REFERENCES {status_table} (id)'} # add item fields - already_seen_varnames = set() - for field in fields: - if field.anonymise is True: - continue - if not field.varname or '-' in field.varname: - continue - if field.varname in already_seen_varnames: - continue - already_seen_varnames.add(field.varname) + for field in self.good_fields.values(): if field.type == 'item': comment = ('valeurs du champ « %s » du formulaire %s' % (field.label, self.formdef.schema.name)) @@ -647,37 +634,55 @@ class WcsFormdefFeeder(object): field_def = 'varchar' else: continue - self.fields.append(field) - at = 'field_%s' % field.varname - columns.append([at, field_def]) - comments[at] = field.label + columns[field.varname] = { + 'field': field, + 'sql_col_name': 'field_%s' % field.varname, + 'sql_col_def': field_def, + 'sql_comment': field.label, + } + + # keep loaded fields around + for key in columns: + if columns[key].get('field') is not None: + self.fields.append(columns[key].get('field')) # add geolocation fields for geolocation, label in self.formdef.schema.geolocations: at = 'geolocation_%s' % geolocation - columns.append([at, 'point']) - comments[at] = label + columns[at] = { + 'sql_col_name': at, + 'sql_col_def': 'point', + 'comment': 'géoloc « %s »' % label, + } # add function fields for function, name in self.formdef.schema.workflow.functions.iteritems(): at = 'function_%s' % slugify(function) - columns.append([at, 'smallint REFERENCES {role_table} (id)']) - comments[at] = 'fonction « %s »' % name + columns[at] = { + 'sql_col_name': at, + 'sql_col_def': 'smallint REFERENCES {role_table} (id)', + 'comment': 'fonction « %s »' % name, + } - self.columns = ([name for name, _type in self.olap_feeder.columns] + [ - name for name, _type in columns]) + self.columns = [name for name, _type in self.olap_feeder.columns] + for key in columns: + self.columns.append(columns[key]['sql_col_name']) self.columns.remove('geolocation_base') - self.create_table(self.table_name, columns, inherits='{generic_formdata_table}', + self.create_table(self.table_name, + [(columns[key]['sql_col_name'], columns[key]['sql_col_def']) for key in columns], + inherits='{generic_formdata_table}', comment='formulaire %s' % self.formdef.schema.name) - for at, comment in comments.iteritems(): - self.ex('COMMENT ON COLUMN {formdata_table}.%s IS %%s' % quote(at), vars=(comment,)) + for key in columns: + column = columns[key] + if column.get('sql_comment'): + self.ex('COMMENT ON COLUMN {formdata_table}.%s IS %%s' % quote(column['sql_col_name']), + vars=(column['sql_comment'],)) # Creat index for JSON fields if self.has_jsonb: - for field in fields: - if field.varname and '-' not in field.varname: - self.create_formdata_json_index(self.table_name, field.varname) + for varname in self.good_fields: + self.create_formdata_json_index(self.table_name, varname) # PostgreSQL does not propagate foreign key constraints to child tables # so we must recreate them manually @@ -900,6 +905,11 @@ class WcsFormdefFeeder(object): # create cube cube = self.cube = copy.deepcopy(self.base_cube) + + def add_warning(message): + self.logger.warning('%s', message) + cube.setdefault('warnings', []).append(message) + # remove json field from formdef cubes cube.pop('json_field', None) cube.update({ @@ -950,13 +960,32 @@ class WcsFormdefFeeder(object): fields = self.formdef.schema.fields if self.formdef.schema.workflow: fields += self.formdef.schema.workflow.fields + + # filter duplicates + duplicate_varnames = set() + self.good_fields = good_fields = OrderedDict() for field in fields: + if field.type not in ('item', 'bool', 'string'): + continue if field.anonymise is True: continue if not field.varname: + add_warning('le champ « %s » n\' a pas de nom de variable, il a été ignoré' % field.label) continue - if '-' in field.varname: + if field.varname in good_fields: + # duplicate found + duplicate_varnames.add(field.varname) + add_warning('le champ « %(label)s » a un nom de variable dupliqué « %(varname)s »' % { + 'label': good_fields[field.varname].label, + 'varname': field.varname + }) + del self.good_fields[field.varname] + if field.varname in duplicate_varnames: + add_warning('le champ « %(label)s » a un nom de variable dupliqué « %(varname)s »' % field.__dict__) continue + self.good_fields[field.varname] = field + + for field in good_fields.values(): join = None if field.type == 'item':