ignore fields with duplicated varname (#26207)

This commit is contained in:
Benjamin Dauvergne 2019-09-30 17:05:21 +02:00
parent 0c78cf8731
commit 32ff0b75b5
4 changed files with 74 additions and 40 deletions

View File

@ -14,7 +14,6 @@ import pytest
import utils
Wcs = namedtuple('Wcs', ['url', 'appdir', 'pid'])
@ -101,7 +100,9 @@ formdef.fields = [
items=['foo', 'bar', 'baz'], varname='item'),
fields.BoolField(id='3', label='3rd field', type='bool', varname='bool'),
fields.ItemField(id='4', label='4rth field', type='item', varname='itemOpen'),
fields.StringField(id='5', label='5th field', type='string', anonymise=False, varname='stringCaseSensitiveé'),
fields.StringField(id='5', label='5th field', type='string', anonymise=False, varname='stringCaseSensitive-é'),
fields.BoolField(id='6', label='6th field duplicate', type='bool', varname='duplicate'),
fields.StringField(id='7', label='7th field duplicate', type='string', anonymise=False, varname='duplicate'),
]
formdef.store()

View File

@ -279,9 +279,9 @@
{
"filter": true,
"label": "5th field",
"name": "stringCaseSensitiveé",
"name": "stringCaseSensitive-é",
"type": "string",
"value": "\"field_stringCaseSensitiveé\""
"value": "\"field_stringCaseSensitive-é\""
}
],
"fact_table" : "\"formdata_demande\"",
@ -395,7 +395,11 @@
"type" : "point"
}
],
"name" : "formdata_demande"
"name" : "formdata_demande",
"warnings": [
"le champ « 6th field duplicate » a un nom de variable dupliqué « duplicate »",
"le champ « 7th field duplicate » a un nom de variable dupliqué « duplicate »"
]
}
],
"label" : "olap",

View File

@ -60,7 +60,7 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog):
('formdata_demande', 'field_item'),
('formdata_demande', 'field_bool'),
('formdata_demande', 'field_itemOpen'),
('formdata_demande', 'field_stringCaseSensitive\xe9'),
('formdata_demande', 'field_stringCaseSensitive-\xe9'),
('formdata_demande', 'function__receiver'),
('formdata_demande_field_item', 'id'),
('formdata_demande_field_item', 'label'),

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals
from collections import OrderedDict, Counter
import datetime
import six
import copy
@ -610,25 +611,11 @@ class WcsFormdefFeeder(object):
self.formdef.schema.name])
self.formdef_sql_id = self.cur.fetchone()[0]
columns = [['status_id', 'smallint REFERENCES {status_table} (id)']]
comments = {}
# compute list of fields
fields = self.formdef.schema.fields
if self.formdef.schema.workflow:
fields += self.formdef.schema.workflow.fields
columns = OrderedDict()
columns['status_id'] = {'sql_col_name': 'status_id', 'sql_col_def': 'smallint REFERENCES {status_table} (id)'}
# add item fields
already_seen_varnames = set()
for field in fields:
if field.anonymise is True:
continue
if not field.varname or '-' in field.varname:
continue
if field.varname in already_seen_varnames:
continue
already_seen_varnames.add(field.varname)
for field in self.good_fields.values():
if field.type == 'item':
comment = ('valeurs du champ « %s » du formulaire %s'
% (field.label, self.formdef.schema.name))
@ -647,37 +634,55 @@ class WcsFormdefFeeder(object):
field_def = 'varchar'
else:
continue
self.fields.append(field)
at = 'field_%s' % field.varname
columns.append([at, field_def])
comments[at] = field.label
columns[field.varname] = {
'field': field,
'sql_col_name': 'field_%s' % field.varname,
'sql_col_def': field_def,
'sql_comment': field.label,
}
# keep loaded fields around
for key in columns:
if columns[key].get('field') is not None:
self.fields.append(columns[key].get('field'))
# add geolocation fields
for geolocation, label in self.formdef.schema.geolocations:
at = 'geolocation_%s' % geolocation
columns.append([at, 'point'])
comments[at] = label
columns[at] = {
'sql_col_name': at,
'sql_col_def': 'point',
'comment': 'géoloc « %s »' % label,
}
# add function fields
for function, name in self.formdef.schema.workflow.functions.iteritems():
at = 'function_%s' % slugify(function)
columns.append([at, 'smallint REFERENCES {role_table} (id)'])
comments[at] = 'fonction « %s »' % name
columns[at] = {
'sql_col_name': at,
'sql_col_def': 'smallint REFERENCES {role_table} (id)',
'comment': 'fonction « %s »' % name,
}
self.columns = ([name for name, _type in self.olap_feeder.columns] + [
name for name, _type in columns])
self.columns = [name for name, _type in self.olap_feeder.columns]
for key in columns:
self.columns.append(columns[key]['sql_col_name'])
self.columns.remove('geolocation_base')
self.create_table(self.table_name, columns, inherits='{generic_formdata_table}',
self.create_table(self.table_name,
[(columns[key]['sql_col_name'], columns[key]['sql_col_def']) for key in columns],
inherits='{generic_formdata_table}',
comment='formulaire %s' % self.formdef.schema.name)
for at, comment in comments.iteritems():
self.ex('COMMENT ON COLUMN {formdata_table}.%s IS %%s' % quote(at), vars=(comment,))
for key in columns:
column = columns[key]
if column.get('sql_comment'):
self.ex('COMMENT ON COLUMN {formdata_table}.%s IS %%s' % quote(column['sql_col_name']),
vars=(column['sql_comment'],))
# Creat index for JSON fields
if self.has_jsonb:
for field in fields:
if field.varname and '-' not in field.varname:
self.create_formdata_json_index(self.table_name, field.varname)
for varname in self.good_fields:
self.create_formdata_json_index(self.table_name, varname)
# PostgreSQL does not propagate foreign key constraints to child tables
# so we must recreate them manually
@ -900,6 +905,11 @@ class WcsFormdefFeeder(object):
# create cube
cube = self.cube = copy.deepcopy(self.base_cube)
def add_warning(message):
self.logger.warning('%s', message)
cube.setdefault('warnings', []).append(message)
# remove json field from formdef cubes
cube.pop('json_field', None)
cube.update({
@ -950,13 +960,32 @@ class WcsFormdefFeeder(object):
fields = self.formdef.schema.fields
if self.formdef.schema.workflow:
fields += self.formdef.schema.workflow.fields
# filter duplicates
duplicate_varnames = set()
self.good_fields = good_fields = OrderedDict()
for field in fields:
if field.type not in ('item', 'bool', 'string'):
continue
if field.anonymise is True:
continue
if not field.varname:
add_warning('le champ « %s » n\' a pas de nom de variable, il a été ignoré' % field.label)
continue
if '-' in field.varname:
if field.varname in good_fields:
# duplicate found
duplicate_varnames.add(field.varname)
add_warning('le champ « %(label)s » a un nom de variable dupliqué « %(varname)s »' % {
'label': good_fields[field.varname].label,
'varname': field.varname
})
del self.good_fields[field.varname]
if field.varname in duplicate_varnames:
add_warning('le champ « %(label)s » a un nom de variable dupliqué « %(varname)s »' % field.__dict__)
continue
self.good_fields[field.varname] = field
for field in good_fields.values():
join = None
if field.type == 'item':