sql: unify full text search normalization (#46015)

This commit is contained in:
Frédéric Péters 2020-08-24 10:19:57 +02:00
parent 7e392aeaa4
commit fe8c22618d
2 changed files with 13 additions and 4 deletions

View File

@ -975,6 +975,7 @@ def test_sql_criteria_fts():
assert data_class.count() == 50
assert len(data_class.select()) == 50
assert set(data_class.get_ids_from_query('BAR')) == set(range(21, 51))
assert [x.id for x in data_class.select([st.FtsMatch('BAR')], order_by='id')] == list(range(21, 51))
# check fts against data in history
@ -1012,6 +1013,11 @@ def test_sql_criteria_fts():
assert data_class.count([st.FtsMatch(user.name)]) == 1
assert data_class.count([st.FtsMatch('Frederic')]) == 1
# check looking up a display id
assert len(data_class.get_ids_from_query(formdata1.id_display)) == 1
assert len(data_class.select([st.FtsMatch(formdata1.id_display)])) == 1
assert data_class.select([st.FtsMatch(formdata1.id_display)])[0].id_display == formdata1.id_display
def table_exists(cur, table_name):
cur.execute('''SELECT COUNT(*) FROM information_schema.tables

View File

@ -238,7 +238,11 @@ class ILike(Criteria):
class FtsMatch(Criteria):
def __init__(self, value):
self.value = qommon.misc.simplify(value, space=' ')
self.value = self.get_fts_value(value)
@classmethod
def get_fts_value(self, value):
return unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
def as_sql(self):
return 'fts @@ plainto_tsquery(%%(c%s)s)' % id(self.value)
@ -1100,7 +1104,6 @@ class SqlMixin(object):
conn.commit()
cur.close()
@classmethod
@guard_postgres
def get_ids_from_query(cls, query):
@ -1108,7 +1111,7 @@ class SqlMixin(object):
sql_statement = '''SELECT id FROM %s
WHERE fts @@ plainto_tsquery(%%(value)s)''' % cls._table_name
cur.execute(sql_statement, {'value': qommon.misc.simplify(query, ' ')})
cur.execute(sql_statement, {'value': FtsMatch.get_fts_value(query)})
all_ids = [x[0] for x in cur.fetchall()]
cur.close()
return all_ids
@ -1694,7 +1697,7 @@ class SqlDataMixin(SqlMixin):
WHERE id = %%(id)s''' % self._table_name
cur.execute(sql_statement, {
'id': self.id,
'fts': unicodedata.normalize('NFKD', ' '.join(fts_strings)).encode('ascii', 'ignore').decode('ascii'),
'fts': FtsMatch.get_fts_value(' '.join(fts_strings)),
})
conn.commit()