use only unicode strings (#26207)

This commit is contained in:
Benjamin Dauvergne 2019-09-30 17:04:20 +02:00
parent dee2d6ab9e
commit 4fc741c9e0
4 changed files with 53 additions and 43 deletions

View File

@ -101,7 +101,7 @@ formdef.fields = [
items=['foo', 'bar', 'baz'], varname='item'),
fields.BoolField(id='3', label='3rd field', type='bool', varname='bool'),
fields.ItemField(id='4', label='4rth field', type='item', varname='itemOpen'),
fields.StringField(id='5', label='5th field', type='string', anonymise=False, varname='stringCaseSensitive'),
fields.StringField(id='5', label='5th field', type='string', anonymise=False, varname='stringCaseSensitiveé'),
]
formdef.store()
@ -111,7 +111,7 @@ for i in range(50):
formdata = formdef.data_class()()
formdata.just_created()
formdata.receipt_time = datetime.datetime(2018, random.randrange(1, 13), random.randrange(1, 29)).timetuple()
formdata.data = {'1': 'FOO BAR %d' % i}
formdata.data = {'1': 'FOO BAR é %d' % i}
if i%4 == 0:
formdata.data['2'] = 'foo'
formdata.data['2_display'] = 'foo'

View File

@ -279,9 +279,9 @@
{
"filter": true,
"label": "5th field",
"name": "stringCaseSensitive",
"name": "stringCaseSensitiveé",
"type": "string",
"value": "\"field_stringCaseSensitive\""
"value": "\"field_stringCaseSensitiveé\""
}
],
"fact_table" : "\"formdata_demande\"",

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import json
import pytest
@ -58,7 +60,7 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog):
('formdata_demande', 'field_item'),
('formdata_demande', 'field_bool'),
('formdata_demande', 'field_itemOpen'),
('formdata_demande', 'field_stringCaseSensitive'),
('formdata_demande', 'field_stringCaseSensitive\xe9'),
('formdata_demande', 'function__receiver'),
('formdata_demande_field_item', 'id'),
('formdata_demande_field_item', 'label'),

View File

@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import datetime
import six
import copy
@ -13,6 +15,9 @@ import psycopg2
from cached_property import cached_property
from wcs_olap.wcs_api import WcsApiError
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
def quote(name):
return '"%s"' % name
@ -53,13 +58,13 @@ class Context(object):
class WcsOlapFeeder(object):
channels = [
[1, 'web', u'web'],
[2, 'mail', u'courrier'],
[3, 'phone', u'téléphone'],
[4, 'counter', u'guichet'],
[5, 'backoffice', u'backoffice'],
[6, 'email', u'email'],
[7, 'fax', u'fax'],
[1, 'web', 'web'],
[2, 'mail', 'courrier'],
[3, 'phone', 'téléphone'],
[4, 'counter', 'guichet'],
[5, 'backoffice', 'backoffice'],
[6, 'email', 'email'],
[7, 'fax', 'fax'],
]
channel_to_id = dict((c[1], c[0]) for c in channels)
id_to_channel = dict((c[0], c[1]) for c in channels)
@ -106,7 +111,7 @@ class WcsOlapFeeder(object):
}
cube = {
'name': 'all_formdata',
'label': u'Tous les formulaires',
'label': 'Tous les formulaires',
'fact_table': 'formdata',
'key': 'id',
'joins': [
@ -267,7 +272,6 @@ class WcsOlapFeeder(object):
self.connection = psycopg2.connect(dsn=pg_dsn)
self.connection.autocommit = True
self.cur = self.connection.cursor()
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE, self.cur)
try:
self.has_jsonb = self.detect_jsonb()
@ -298,7 +302,7 @@ class WcsOlapFeeder(object):
if len(table_name) < 64 and not force_hash:
return table_name
else:
return table_name[:63-hash_length] + hashlib.md5(table_name).hexdigest()[:hash_length]
return table_name[:63 - hash_length] + hashlib.md5(table_name.encode('utf-8')).hexdigest()[:hash_length]
@property
def default_ctx(self):
@ -444,30 +448,30 @@ CREATE TABLE public.dates AS (SELECT
def do_base_table(self):
# channels
self.create_labeled_table('channel', [[c[0], c[2]] for c in self.channels],
comment=u'canal')
comment='canal')
# roles
roles = dict((i, role.name) for i, role in enumerate(self.roles))
tmp_role_map = self.create_labeled_table('role', roles.items(), comment=u'role')
tmp_role_map = self.create_labeled_table('role', roles.items(), comment='role')
self.role_mapping = dict(
(role.id, tmp_role_map[role.name]) for role in self.roles)
# categories
tmp_cat_map = self.create_labeled_table(
'category', enumerate(c.name for c in self.categories), comment=u'catégorie')
'category', enumerate(c.name for c in self.categories), comment='catégorie')
self.categories_mapping = dict((c.id, tmp_cat_map[c.name]) for c in self.categories)
self.create_labeled_table('hour', zip(range(0, 24), map(str, range(0, 24))),
comment=u'heures')
comment='heures')
self.create_labeled_table('status', self.status,
comment=u'statuts simplifiés')
comment='statuts simplifiés')
self.ex('CREATE TABLE {form_table} (id serial PRIMARY KEY,'
' category_id integer REFERENCES {category_table} (id),'
' label varchar)')
self.ex('COMMENT ON TABLE {form_table} IS %s', vars=(u'types de formulaire',))
self.ex('COMMENT ON TABLE {form_table} IS %s', vars=('types de formulaire',))
# agents
self.create_labeled_table_serial('agent', comment=u'agents')
self.create_labeled_table_serial('agent', comment='agents')
self.columns = [
['id', 'serial primary key'],
@ -484,19 +488,19 @@ CREATE TABLE public.dates AS (SELECT
if self.has_jsonb:
self.columns.append(['json_data', 'JSONB NULL'])
self.comments = {
'formdef_id': u'formulaire',
'receipt_time': u'date de réception',
'hour_id': u'heure',
'channel_id': u'canal',
'backoffice': u'soumission backoffce',
'generic_status_id': u'statut simplifié',
'endpoint_delay': u'délai de traitement',
'geolocation_base': u'position géographique',
'formdef_id': 'formulaire',
'receipt_time': 'date de réception',
'hour_id': 'heure',
'channel_id': 'canal',
'backoffice': 'soumission backoffce',
'generic_status_id': 'statut simplifié',
'endpoint_delay': 'délai de traitement',
'geolocation_base': 'position géographique',
}
self.create_table('{generic_formdata_table}', self.columns)
for at, comment in self.comments.iteritems():
self.ex('COMMENT ON COLUMN {generic_formdata_table}.%s IS %%s' % at, vars=(comment,))
self.ex('COMMENT ON TABLE {generic_formdata_table} IS %s', vars=(u'tous les formulaires',))
self.ex('COMMENT ON TABLE {generic_formdata_table} IS %s', vars=('tous les formulaires',))
# evolutions
self.create_table('{generic_evolution_table}', [
['id', 'serial primary key'],
@ -506,7 +510,7 @@ CREATE TABLE public.dates AS (SELECT
['date', 'date'],
['hour_id', 'smallint REFERENCES {hour_table} (id)'],
])
self.ex('COMMENT ON TABLE {generic_evolution_table} IS %s', vars=(u'evolution générique',))
self.ex('COMMENT ON TABLE {generic_evolution_table} IS %s', vars=('evolution générique',))
def feed(self):
try:
@ -522,7 +526,7 @@ CREATE TABLE public.dates AS (SELECT
formdef_feeder = WcsFormdefFeeder(self, formdef, do_feed=self.do_feed)
formdef_feeder.feed()
except WcsApiError as e:
self.logger.error(u'failed to retrieve formdef %s, %s', formdef.slug, e)
self.logger.error('failed to retrieve formdef %s, %s', formdef.slug, e)
if 'cubes_model_dirs' in self.config:
model_path = os.path.join(self.config['cubes_model_dirs'], '%s.model' % self.schema)
with open(model_path, 'w') as f:
@ -597,7 +601,7 @@ class WcsFormdefFeeder(object):
statuses = self.formdef.schema.workflow.statuses
tmp_status_map = self.olap_feeder.create_labeled_table(
self.status_table_name, enumerate([s.name for s in statuses]),
comment=u'statuts du formulaire « %s »' % self.formdef.schema.name)
comment='statuts du formulaire « %s »' % self.formdef.schema.name)
self.status_mapping = dict((s.id, tmp_status_map[s.name]) for s in statuses)
def do_data_table(self):
@ -626,7 +630,7 @@ class WcsFormdefFeeder(object):
continue
already_seen_varnames.add(field.varname)
if field.type == 'item':
comment = (u'valeurs du champ « %s » du formulaire %s'
comment = ('valeurs du champ « %s » du formulaire %s'
% (field.label, self.formdef.schema.name))
table_name = self.hash_table_name('%s_field_%s' % (self.table_name, field.varname))
# create table and mapping
@ -658,14 +662,14 @@ class WcsFormdefFeeder(object):
for function, name in self.formdef.schema.workflow.functions.iteritems():
at = 'function_%s' % slugify(function)
columns.append([at, 'smallint REFERENCES {role_table} (id)'])
comments[at] = u'fonction « %s »' % name
comments[at] = 'fonction « %s »' % name
self.columns = ([name for name, _type in self.olap_feeder.columns] + [
name for name, _type in columns])
self.columns.remove('geolocation_base')
self.create_table(self.table_name, columns, inherits='{generic_formdata_table}',
comment=u'formulaire %s' % self.formdef.schema.name)
comment='formulaire %s' % self.formdef.schema.name)
for at, comment in comments.iteritems():
self.ex('COMMENT ON COLUMN {formdata_table}.%s IS %%s' % quote(at), vars=(comment,))
@ -694,7 +698,7 @@ class WcsFormdefFeeder(object):
['hour_id', 'smallint REFERENCES {hour_table} (id)'],
])
self.ex('COMMENT ON TABLE {evolution_table} IS %s',
vars=(u'evolution des demandes %s' % self.formdef.schema.name,))
vars=('evolution des demandes %s' % self.formdef.schema.name,))
def insert_item_value(self, field, value):
table_name = self.hash_table_name('%s_field_%s' % (self.table_name, field.varname))
@ -812,9 +816,7 @@ class WcsFormdefFeeder(object):
at = 'function_%s' % slugify(function)
row[at] = v
tpl = '(' + ', '.join(['%s'] * len(self.columns[1:])) + ')'
value = self.cur.mogrify(tpl, [row[column] for column in self.columns[1:]])
values.append(value)
values.append(tuple(row[column] for column in self.columns[1:]))
# inert evolutions
generic_evolution = []
evolution = []
@ -842,8 +844,14 @@ class WcsFormdefFeeder(object):
if not values:
self.logger.warning('no data')
return
insert_columns = ['%s' % quote(column) for column in self.columns[1:]]
insert_columns = ', '.join(insert_columns)
self.ex('INSERT INTO {formdata_table} ({columns}) VALUES {values} RETURNING id',
ctx=dict(columns=', '.join(['%s' % quote(column) for column in self.columns[1:]]), values=', '.join(values)))
ctx=dict(
columns=insert_columns,
values=', '.join(['%s'] * len(values))
),
vars=values)
# insert generic evolutions
generic_evolutions = []
@ -930,7 +938,7 @@ class WcsFormdefFeeder(object):
})
cube['dimensions'].append({
'name': at,
'label': u'fonction %s' % name.lower(),
'label': 'fonction %s' % name.lower(),
'join': [at],
'type': 'integer',
'value': '%s.id' % quote(at),