From c1ccadde40c74c59088845041eacab2d221c8d61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9ters?= Date: Wed, 20 Oct 2021 08:58:49 +0200 Subject: [PATCH] sql: switch fts normalization to unidecode (#57009) --- debian/control | 1 + setup.py | 1 + wcs/sql.py | 7 ++++--- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/debian/control b/debian/control index 737b2dc63..a698c3980 100644 --- a/debian/control +++ b/debian/control @@ -28,6 +28,7 @@ Depends: graphviz, python3-pyproj, python3-quixote, python3-requests, + pytohn3-unidecode, python3-uwsgidecorators, python3-vobject, python3-xstatic-leaflet, diff --git a/setup.py b/setup.py index 8a18724b5..bdfc79fa2 100644 --- a/setup.py +++ b/setup.py @@ -174,6 +174,7 @@ setup( 'XStatic-Leaflet', 'XStatic-Leaflet-GestureHandling', 'pyproj', + 'unidecode', ], package_dir={'wcs': 'wcs'}, packages=find_packages(), diff --git a/wcs/sql.py b/wcs/sql.py index a1f7128fb..751c04265 100644 --- a/wcs/sql.py +++ b/wcs/sql.py @@ -20,12 +20,12 @@ import io import json import re import time -import unicodedata import uuid import psycopg2 import psycopg2.extensions import psycopg2.extras +import unidecode try: import cPickle as pickle @@ -284,7 +284,7 @@ class FtsMatch(Criteria): @classmethod def get_fts_value(cls, value): - return unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') + return unidecode.unidecode(value) def as_sql(self): return 'fts @@ plainto_tsquery(%%(c%s)s)' % id(self.value) @@ -3450,7 +3450,7 @@ def get_period_total( # latest migration, number + description (description is not used # programmaticaly but will make sure git conflicts if two migrations are # separately added with the same number) -SQL_LEVEL = (54, 'add patch column on snapshot table') +SQL_LEVEL = (55, 'update full text normalisation (switch to unidecode)') def migrate_global_views(conn, cur): @@ -3587,6 +3587,7 @@ def migrate(): # 38: extract submission_agent_id to its own column # 41: update full text normalization # 51: add index on formdata blockdef fields + # 55: update full text normalisation (switch to unidecode) set_reindex('formdata', 'needed', conn=conn, cur=cur) if sql_level < 46: from wcs.carddef import CardDef