misc: accept duplicate fields with the same type (#42429)

This commit is contained in:
Benjamin Dauvergne 2020-08-31 10:46:13 +02:00
parent 326f53e84a
commit a16fb4ee98
4 changed files with 64 additions and 15 deletions

View File

@ -101,10 +101,16 @@ formdef.fields = [
fields.BoolField(id='3', label='3rd field', type='bool', varname='bool'),
fields.ItemField(id='4', label='4rth field', type='item', varname='itemOpen'),
fields.StringField(id='5', label='5th field', type='string', anonymise=False, varname='stringCaseSensitive-é'),
fields.BoolField(id='6', label='6th field duplicate', type='bool', varname='duplicate'),
fields.StringField(id='7', label='7th field duplicate', type='string', anonymise=False, varname='duplicate'),
fields.BoolField(id='6', label='6th field bad duplicate', type='bool', varname='duplicate'),
fields.StringField(id='7', label='7th field bad duplicate', type='string', anonymise=False, varname='duplicate'),
fields.StringField(id='8', label='8th field integer', type='string', anonymise=False, varname='integer',
validation={'type': 'digits'}),
fields.StringField(id='9', label='9th field good duplicate', type='string', anonymise=False,
required=False, varname='good_duplicate'),
fields.StringField(id='10', label='10th field good duplicate', type='string', anonymise=False,
required=False, varname='good_duplicate'),
fields.StringField(id='11', label='11th field third bad duplicate', type='string',
anonymise=False, varname='duplicate'),
]
formdef.store()
@ -124,16 +130,20 @@ for i in range(50):
formdata.data['2_display'] = 'foo'
formdata.data['4'] = 'open_one'
formdata.data['4_display'] = 'open_one'
formdata.data['9'] = 'a'
elif i%4 == 1:
formdata.data['2'] = 'bar'
formdata.data['2_display'] = 'bar'
formdata.data['4'] = 'open_two'
formdata.data['4_display'] = 'open_two'
formdata.data['10'] = 'b'
else:
formdata.data['2'] = 'baz'
formdata.data['2_display'] = 'baz'
formdata.data['4'] = "open'three"
formdata.data['4_display'] = "open'three"
formdata.data['9'] = 'a'
formdata.data['10'] = 'b'
formdata.data['3'] = bool(i % 2)
formdata.data['8'] = str(i % 10)

View File

@ -287,6 +287,13 @@
"name": "stringCaseSensitive-é",
"type": "string",
"value": "\"field_stringCaseSensitive-é\""
},
{
"filter": true,
"label": "10th field good duplicate",
"name": "good_duplicate",
"type": "string",
"value": "\"field_good_duplicate\""
}
],
"fact_table" : "\"formdata_demande\"",
@ -419,8 +426,8 @@
],
"name" : "formdata_demande",
"warnings": [
"le champ « 6th field duplicate » a un nom de variable dupliqué « duplicate »",
"le champ « 7th field duplicate » a un nom de variable dupliqué « duplicate »"
"Le champ « 7th field bad duplicate » a un nom de variable dupliqué « duplicate » mais pas le même type que « 6th field bad duplicate », tous les champs avec ce nom seront ignorés (string != bool).",
"Le champ « 11th field third bad duplicate » est un doublon d'un champ de type différent, il a été ignoré."
]
}
],

View File

@ -54,6 +54,14 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog):
for url_with_limit in url_with_limits:
assert 'limit=500&' in url_with_limit
assert (
'Le champ « 7th field bad duplicate » a un nom de variable dupliqué '
'« duplicate » mais pas le même type que « 6th field bad duplicate », '
'tous les champs avec ce nom seront ignorés (string != bool).') in caplog.text
assert (
'Le champ « 11th field third bad duplicate » est un doublon d\'un '
'champ de type différent, il a été ignoré.') in caplog.text
expected_schema = [
('agent', 'id'),
('agent', 'label'),
@ -103,6 +111,7 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog):
('formdata_demande', 'field_itemOpen'),
('formdata_demande', 'field_stringCaseSensitive-\xe9'),
('formdata_demande', 'field_integer'),
('formdata_demande', 'field_good_duplicate'),
('formdata_demande', 'function__receiver'),
('formdata_demande_field_item', 'id'),
('formdata_demande_field_item', 'label'),
@ -167,6 +176,19 @@ FULL OUTER JOIN public.dates AS dates ON series.date = dates.date WHERE dates.da
expected_json_schema['pg_dsn'] = postgres_db.dsn
assert json_schema == expected_json_schema
# verify data in duplicated columns
with postgres_db.conn() as conn:
with conn.cursor() as c:
c.execute('SET search_path = olap')
c.execute('SELECT field_good_duplicate, count(id) '
'FROM formdata_demande '
'GROUP BY field_good_duplicate '
'ORDER BY field_good_duplicate')
assert dict(c.fetchall()) == {
'a': 37,
'b': 13,
}
def test_requests_exception(wcs, postgres_db, tmpdir, olap_cmd, caplog):
@httmock.urlmatch()
@ -189,6 +211,7 @@ def test_requests_not_ok(wcs, postgres_db, tmpdir, olap_cmd, caplog):
olap_cmd(no_log_errors=False)
assert 'invalid signature' in caplog.text
def test_requests_not_json(wcs, postgres_db, tmpdir, olap_cmd, caplog):
@httmock.urlmatch()
def return_invalid_json(url, request):

View File

@ -1023,26 +1023,35 @@ class WcsFormdefFeeder(object):
fields += self.formdef.schema.workflow.fields
# filter duplicates
duplicate_varnames = set()
self.good_fields = good_fields = OrderedDict()
bad_varnames = set()
for field in fields:
if field.type not in ('item', 'bool', 'string'):
continue
if field.anonymise is True:
continue
if not field.varname:
add_warning('le champ « %s » n\' a pas de nom de variable, il a été ignoré' % field.label)
add_warning('Le champ « %s » n\' a pas de nom de variable, il a été ignoré.' % field.label)
continue
if field.varname in good_fields:
# duplicate found
duplicate_varnames.add(field.varname)
add_warning('le champ « %(label)s » a un nom de variable dupliqué « %(varname)s »' % {
'label': good_fields[field.varname].label,
'varname': field.varname
})
if field.varname in good_fields and good_fields[field.varname].type != field.type:
# duplicate found, but type is not coherent
add_warning(
'Le champ « %(label)s » a un nom de variable dupliqué « %(varname)s » '
'mais pas le même type que « %(label_good)s », tous les champs avec ce nom seront ignorés '
'(%(type)s != %(type_good)s).' % {
'label': field.label,
'varname': field.varname,
'type': field.type,
'label_good': good_fields[field.varname].label,
'type_good': good_fields[field.varname].type,
}
)
del self.good_fields[field.varname]
if field.varname in duplicate_varnames:
add_warning('le champ « %(label)s » a un nom de variable dupliqué « %(varname)s »' % field.__dict__)
bad_varnames.add(field.varname)
continue
if field.varname in bad_varnames:
add_warning('Le champ « %s » est un doublon d\'un champ de type différent, il a été ignoré.'
% field.label)
continue
self.good_fields[field.varname] = field