From 4fc741c9e0d135866b31193be336f141658c9170 Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Mon, 30 Sep 2019 17:04:20 +0200 Subject: [PATCH] use only unicode strings (#26207) --- tests/conftest.py | 4 +-- tests/olap.model | 4 +-- tests/test_wcs.py | 4 ++- wcs_olap/feeder.py | 84 +++++++++++++++++++++++++--------------------- 4 files changed, 53 insertions(+), 43 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index e78674b..6d62535 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -101,7 +101,7 @@ formdef.fields = [ items=['foo', 'bar', 'baz'], varname='item'), fields.BoolField(id='3', label='3rd field', type='bool', varname='bool'), fields.ItemField(id='4', label='4rth field', type='item', varname='itemOpen'), - fields.StringField(id='5', label='5th field', type='string', anonymise=False, varname='stringCaseSensitive'), + fields.StringField(id='5', label='5th field', type='string', anonymise=False, varname='stringCaseSensitiveé'), ] formdef.store() @@ -111,7 +111,7 @@ for i in range(50): formdata = formdef.data_class()() formdata.just_created() formdata.receipt_time = datetime.datetime(2018, random.randrange(1, 13), random.randrange(1, 29)).timetuple() - formdata.data = {'1': 'FOO BAR %d' % i} + formdata.data = {'1': 'FOO BAR é %d' % i} if i%4 == 0: formdata.data['2'] = 'foo' formdata.data['2_display'] = 'foo' diff --git a/tests/olap.model b/tests/olap.model index 61e18db..553f485 100644 --- a/tests/olap.model +++ b/tests/olap.model @@ -279,9 +279,9 @@ { "filter": true, "label": "5th field", - "name": "stringCaseSensitive", + "name": "stringCaseSensitiveé", "type": "string", - "value": "\"field_stringCaseSensitive\"" + "value": "\"field_stringCaseSensitiveé\"" } ], "fact_table" : "\"formdata_demande\"", diff --git a/tests/test_wcs.py b/tests/test_wcs.py index aabd262..4e82e19 100644 --- a/tests/test_wcs.py +++ b/tests/test_wcs.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import json import pytest @@ -58,7 +60,7 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog): ('formdata_demande', 'field_item'), ('formdata_demande', 'field_bool'), ('formdata_demande', 'field_itemOpen'), - ('formdata_demande', 'field_stringCaseSensitive'), + ('formdata_demande', 'field_stringCaseSensitive\xe9'), ('formdata_demande', 'function__receiver'), ('formdata_demande_field_item', 'id'), ('formdata_demande_field_item', 'label'), diff --git a/wcs_olap/feeder.py b/wcs_olap/feeder.py index fa70dac..dc947ce 100644 --- a/wcs_olap/feeder.py +++ b/wcs_olap/feeder.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +from __future__ import unicode_literals + import datetime import six import copy @@ -13,6 +15,9 @@ import psycopg2 from cached_property import cached_property from wcs_olap.wcs_api import WcsApiError +psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) +psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) + def quote(name): return '"%s"' % name @@ -53,13 +58,13 @@ class Context(object): class WcsOlapFeeder(object): channels = [ - [1, 'web', u'web'], - [2, 'mail', u'courrier'], - [3, 'phone', u'téléphone'], - [4, 'counter', u'guichet'], - [5, 'backoffice', u'backoffice'], - [6, 'email', u'email'], - [7, 'fax', u'fax'], + [1, 'web', 'web'], + [2, 'mail', 'courrier'], + [3, 'phone', 'téléphone'], + [4, 'counter', 'guichet'], + [5, 'backoffice', 'backoffice'], + [6, 'email', 'email'], + [7, 'fax', 'fax'], ] channel_to_id = dict((c[1], c[0]) for c in channels) id_to_channel = dict((c[0], c[1]) for c in channels) @@ -106,7 +111,7 @@ class WcsOlapFeeder(object): } cube = { 'name': 'all_formdata', - 'label': u'Tous les formulaires', + 'label': 'Tous les formulaires', 'fact_table': 'formdata', 'key': 'id', 'joins': [ @@ -267,7 +272,6 @@ class WcsOlapFeeder(object): self.connection = psycopg2.connect(dsn=pg_dsn) self.connection.autocommit = True self.cur = self.connection.cursor() - psycopg2.extensions.register_type(psycopg2.extensions.UNICODE, self.cur) try: self.has_jsonb = self.detect_jsonb() @@ -298,7 +302,7 @@ class WcsOlapFeeder(object): if len(table_name) < 64 and not force_hash: return table_name else: - return table_name[:63-hash_length] + hashlib.md5(table_name).hexdigest()[:hash_length] + return table_name[:63 - hash_length] + hashlib.md5(table_name.encode('utf-8')).hexdigest()[:hash_length] @property def default_ctx(self): @@ -444,30 +448,30 @@ CREATE TABLE public.dates AS (SELECT def do_base_table(self): # channels self.create_labeled_table('channel', [[c[0], c[2]] for c in self.channels], - comment=u'canal') + comment='canal') # roles roles = dict((i, role.name) for i, role in enumerate(self.roles)) - tmp_role_map = self.create_labeled_table('role', roles.items(), comment=u'role') + tmp_role_map = self.create_labeled_table('role', roles.items(), comment='role') self.role_mapping = dict( (role.id, tmp_role_map[role.name]) for role in self.roles) # categories tmp_cat_map = self.create_labeled_table( - 'category', enumerate(c.name for c in self.categories), comment=u'catégorie') + 'category', enumerate(c.name for c in self.categories), comment='catégorie') self.categories_mapping = dict((c.id, tmp_cat_map[c.name]) for c in self.categories) self.create_labeled_table('hour', zip(range(0, 24), map(str, range(0, 24))), - comment=u'heures') + comment='heures') self.create_labeled_table('status', self.status, - comment=u'statuts simplifiés') + comment='statuts simplifiés') self.ex('CREATE TABLE {form_table} (id serial PRIMARY KEY,' ' category_id integer REFERENCES {category_table} (id),' ' label varchar)') - self.ex('COMMENT ON TABLE {form_table} IS %s', vars=(u'types de formulaire',)) + self.ex('COMMENT ON TABLE {form_table} IS %s', vars=('types de formulaire',)) # agents - self.create_labeled_table_serial('agent', comment=u'agents') + self.create_labeled_table_serial('agent', comment='agents') self.columns = [ ['id', 'serial primary key'], @@ -484,19 +488,19 @@ CREATE TABLE public.dates AS (SELECT if self.has_jsonb: self.columns.append(['json_data', 'JSONB NULL']) self.comments = { - 'formdef_id': u'formulaire', - 'receipt_time': u'date de réception', - 'hour_id': u'heure', - 'channel_id': u'canal', - 'backoffice': u'soumission backoffce', - 'generic_status_id': u'statut simplifié', - 'endpoint_delay': u'délai de traitement', - 'geolocation_base': u'position géographique', + 'formdef_id': 'formulaire', + 'receipt_time': 'date de réception', + 'hour_id': 'heure', + 'channel_id': 'canal', + 'backoffice': 'soumission backoffce', + 'generic_status_id': 'statut simplifié', + 'endpoint_delay': 'délai de traitement', + 'geolocation_base': 'position géographique', } self.create_table('{generic_formdata_table}', self.columns) for at, comment in self.comments.iteritems(): self.ex('COMMENT ON COLUMN {generic_formdata_table}.%s IS %%s' % at, vars=(comment,)) - self.ex('COMMENT ON TABLE {generic_formdata_table} IS %s', vars=(u'tous les formulaires',)) + self.ex('COMMENT ON TABLE {generic_formdata_table} IS %s', vars=('tous les formulaires',)) # evolutions self.create_table('{generic_evolution_table}', [ ['id', 'serial primary key'], @@ -506,7 +510,7 @@ CREATE TABLE public.dates AS (SELECT ['date', 'date'], ['hour_id', 'smallint REFERENCES {hour_table} (id)'], ]) - self.ex('COMMENT ON TABLE {generic_evolution_table} IS %s', vars=(u'evolution générique',)) + self.ex('COMMENT ON TABLE {generic_evolution_table} IS %s', vars=('evolution générique',)) def feed(self): try: @@ -522,7 +526,7 @@ CREATE TABLE public.dates AS (SELECT formdef_feeder = WcsFormdefFeeder(self, formdef, do_feed=self.do_feed) formdef_feeder.feed() except WcsApiError as e: - self.logger.error(u'failed to retrieve formdef %s, %s', formdef.slug, e) + self.logger.error('failed to retrieve formdef %s, %s', formdef.slug, e) if 'cubes_model_dirs' in self.config: model_path = os.path.join(self.config['cubes_model_dirs'], '%s.model' % self.schema) with open(model_path, 'w') as f: @@ -597,7 +601,7 @@ class WcsFormdefFeeder(object): statuses = self.formdef.schema.workflow.statuses tmp_status_map = self.olap_feeder.create_labeled_table( self.status_table_name, enumerate([s.name for s in statuses]), - comment=u'statuts du formulaire « %s »' % self.formdef.schema.name) + comment='statuts du formulaire « %s »' % self.formdef.schema.name) self.status_mapping = dict((s.id, tmp_status_map[s.name]) for s in statuses) def do_data_table(self): @@ -626,7 +630,7 @@ class WcsFormdefFeeder(object): continue already_seen_varnames.add(field.varname) if field.type == 'item': - comment = (u'valeurs du champ « %s » du formulaire %s' + comment = ('valeurs du champ « %s » du formulaire %s' % (field.label, self.formdef.schema.name)) table_name = self.hash_table_name('%s_field_%s' % (self.table_name, field.varname)) # create table and mapping @@ -658,14 +662,14 @@ class WcsFormdefFeeder(object): for function, name in self.formdef.schema.workflow.functions.iteritems(): at = 'function_%s' % slugify(function) columns.append([at, 'smallint REFERENCES {role_table} (id)']) - comments[at] = u'fonction « %s »' % name + comments[at] = 'fonction « %s »' % name self.columns = ([name for name, _type in self.olap_feeder.columns] + [ name for name, _type in columns]) self.columns.remove('geolocation_base') self.create_table(self.table_name, columns, inherits='{generic_formdata_table}', - comment=u'formulaire %s' % self.formdef.schema.name) + comment='formulaire %s' % self.formdef.schema.name) for at, comment in comments.iteritems(): self.ex('COMMENT ON COLUMN {formdata_table}.%s IS %%s' % quote(at), vars=(comment,)) @@ -694,7 +698,7 @@ class WcsFormdefFeeder(object): ['hour_id', 'smallint REFERENCES {hour_table} (id)'], ]) self.ex('COMMENT ON TABLE {evolution_table} IS %s', - vars=(u'evolution des demandes %s' % self.formdef.schema.name,)) + vars=('evolution des demandes %s' % self.formdef.schema.name,)) def insert_item_value(self, field, value): table_name = self.hash_table_name('%s_field_%s' % (self.table_name, field.varname)) @@ -812,9 +816,7 @@ class WcsFormdefFeeder(object): at = 'function_%s' % slugify(function) row[at] = v - tpl = '(' + ', '.join(['%s'] * len(self.columns[1:])) + ')' - value = self.cur.mogrify(tpl, [row[column] for column in self.columns[1:]]) - values.append(value) + values.append(tuple(row[column] for column in self.columns[1:])) # inert evolutions generic_evolution = [] evolution = [] @@ -842,8 +844,14 @@ class WcsFormdefFeeder(object): if not values: self.logger.warning('no data') return + insert_columns = ['%s' % quote(column) for column in self.columns[1:]] + insert_columns = ', '.join(insert_columns) self.ex('INSERT INTO {formdata_table} ({columns}) VALUES {values} RETURNING id', - ctx=dict(columns=', '.join(['%s' % quote(column) for column in self.columns[1:]]), values=', '.join(values))) + ctx=dict( + columns=insert_columns, + values=', '.join(['%s'] * len(values)) + ), + vars=values) # insert generic evolutions generic_evolutions = [] @@ -930,7 +938,7 @@ class WcsFormdefFeeder(object): }) cube['dimensions'].append({ 'name': at, - 'label': u'fonction %s' % name.lower(), + 'label': 'fonction %s' % name.lower(), 'join': [at], 'type': 'integer', 'value': '%s.id' % quote(at),