sql: implement new FTS for cards (#86903) #1436

Merged
fpeters merged 1 commits from wip/86903-fts-cards into main 2024-05-02 14:06:21 +02:00
4 changed files with 74 additions and 14 deletions

View File

@ -531,7 +531,10 @@ def test_backoffice_anonymise_no_actions(pub):
assert 'wf-actions' not in resp.forms
def test_backoffice_listing_fts(pub):
@pytest.mark.parametrize('wcs_fts', [True, False])
def test_backoffice_listing_fts(pub, wcs_fts):
pub.site_options.set('options', 'enable-new-fts', 'true' if wcs_fts else 'false')
create_superuser(pub)

Ajouter une requête où le résultat est différent selon l'activation ou pas ?

Ajouter une requête où le résultat est différent selon l'activation ou pas ?

J'avais ciblé ce test parce que pendant le développement de la fonctionnalité il cassait tant que les triggers ne marchaient pas, mais oui je vais ajouter un cas

J'avais ciblé ce test parce que pendant le développement de la fonctionnalité il cassait tant que les triggers ne marchaient pas, mais oui je vais ajouter un cas
create_environment(pub)
formdef = FormDef.get_by_urlname('form-title')

View File

@ -1121,7 +1121,11 @@ def test_sql_criteria_ilike(pub):
assert [x.id for x in data_class.select([st.ILike('f3', 'BAR')], order_by='id')] == list(range(21, 51))
def test_sql_criteria_fts(pub):
@pytest.mark.parametrize('wcs_fts', [True, False])
def test_sql_criteria_fts(pub, wcs_fts):
pub.load_site_options()
pub.site_options.set('options', 'enable-new-fts', 'true' if wcs_fts else 'false')
test_formdef = FormDef()
test_formdef.name = 'table select fts'
test_formdef.fields = [fields.StringField(id='3', label='string')]
@ -1184,6 +1188,19 @@ def test_sql_criteria_fts(pub):
assert len(data_class.select([st.FtsMatch(formdata1.id_display)])) == 1
assert data_class.select([st.FtsMatch(formdata1.id_display)])[0].id_display == formdata1.id_display
# check behaviour difference between old and new fts
data_class.wipe()
formdata = data_class()
formdata.data = {'3': 'dysfonctionnement'}
formdata.just_created()
formdata.store()
if wcs_fts:
for n in range(4, len(formdata.data['3'])):
assert data_class.count([st.FtsMatch(formdata.data['3'][:n])]) == 1
else:
assert data_class.count([st.FtsMatch(formdata.data['3'][:5])]) == 0
assert data_class.count([st.FtsMatch(formdata.data['3'])]) == 1
def test_search_tokens_purge(pub):
_, cur = sql.get_connection_and_cursor()

View File

@ -626,6 +626,8 @@ def do_formdef_tables(formdef, conn=None, cur=None, rebuild_views=False, rebuild
if formdef.data_sql_prefix == 'formdata':
recreate_trigger(formdef, cur, conn)
elif formdef.data_sql_prefix == 'carddata':
init_search_tokens_triggers_carddef(cur, formdef)
# migrations on _evolutions table
cur.execute(
@ -1817,6 +1819,31 @@ $function$;"""
cur.close()
def init_search_tokens_triggers_carddef(cur, carddef):
if not (_table_exists(cur, 'wcs_search_tokens')):
# abort trigger creation if tokens table doesn't exist yet
return
data_class = carddef.data_class()
table = data_class._table_name
trigger_prefix = table[:40]
if not _trigger_exists(cur, table, trigger_prefix + '__search_tokens_trg_ins'):
cur.execute(
"""CREATE TRIGGER %s__search_tokens_trg_ins
AFTER INSERT ON %s
FOR EACH ROW WHEN (NEW.fts IS NOT NULL)
EXECUTE PROCEDURE wcs_search_tokens_trigger_fn();"""
% (trigger_prefix, table)
)
cur.execute(
"""CREATE TRIGGER %s__search_tokens_trg_upd
AFTER UPDATE OF fts ON %s
FOR EACH ROW WHEN (NEW.fts IS NOT NULL)
EXECUTE PROCEDURE wcs_search_tokens_trigger_fn();"""
% (trigger_prefix, table)
)
def init_search_tokens_triggers(cur):
# We define only appending triggers, ie on INSERT and UPDATE.
# It would be far heavier to maintain deletions here, and having extra data has
@ -1875,6 +1902,8 @@ $function$;"""
FOR EACH ROW WHEN (NEW.fts IS NOT NULL)
EXECUTE PROCEDURE wcs_search_tokens_trigger_fn();"""
)
for carddef in CardDef.select():
init_search_tokens_triggers_carddef(cur, carddef)
def init_search_tokens_data(cur):
@ -1894,6 +1923,14 @@ def init_search_tokens_data(cur):
SELECT unnest(tsvector_to_array(fts)) FROM searchable_formdefs
ON CONFLICT(token) DO NOTHING;"""
)
for carddef in CardDef.select():
data_class = carddef.data_class()
cur.execute(
"""INSERT INTO wcs_search_tokens
SELECT unnest(tsvector_to_array(fts)) FROM %s
ON CONFLICT(token) DO NOTHING;"""
% data_class._table_name
)
def purge_obsolete_search_tokens(cur=None):
@ -1902,11 +1939,14 @@ def purge_obsolete_search_tokens(cur=None):
own_cur = True
_, cur = get_connection_and_cursor()
cur.execute(
"""DELETE FROM wcs_search_tokens
query = """DELETE FROM wcs_search_tokens
WHERE token NOT IN (SELECT unnest(tsvector_to_array(fts)) FROM wcs_all_forms)
AND token NOT IN (SELECT unnest(tsvector_to_array(fts)) FROM searchable_formdefs);"""
)
AND token NOT IN (SELECT unnest(tsvector_to_array(fts)) FROM searchable_formdefs)
"""
for carddef in CardDef.select():
data_class = carddef.data_class()
query += 'AND token NOT IN (SELECT unnest(tsvector_to_array(fts)) FROM %s) ' % data_class._table_name
cur.execute(query)
if own_cur:
cur.close()
@ -5416,7 +5456,7 @@ def get_period_total(
# latest migration, number + description (description is not used
# programmaticaly but will make sure git conflicts if two migrations are
# separately added with the same number)
SQL_LEVEL = (109, 'add various indexes')
SQL_LEVEL = (110, 'add cards to the new FTS mechanism')
def migrate_global_views(conn, cur):
@ -5756,8 +5796,9 @@ def migrate():
for formdef in FormDef.select() + CardDef.select():
do_formdef_tables(formdef, rebuild_views=False, rebuild_global_views=False)
if sql_level < 108:
if sql_level < 110:
# 108: new fts mechanism with tokens table
# 110: add cards to fts mechanism
init_search_tokens()
if sql_level != SQL_LEVEL[0]:

View File

@ -19,6 +19,7 @@ import re
import time
import unidecode
from quixote import get_publisher
import wcs.qommon.storage
from wcs.qommon import misc
@ -376,12 +377,10 @@ class FtsMatch(Criteria):
return unidecode.unidecode(value)
def as_sql(self):
return 'fts @@ plainto_tsquery(%%(c%s)s)' % id(self.value)
class WcsFtsMatch(FtsMatch):

Ça m'irait bien d'avoir un feature flag pour contrôler ça, pour débuter, ça donnerait :

class FtsMatch(...):
    def as_sql(self):
         if get_publisher().has_site_option('enable-new-fts'):
              return 'fts @@ wcs_tsquery(%%(c%s)s)' % id(self.value)
         else:
              return 'fts @@ plainto_tsquery(%%(c%s)s)' % id(self.value)

(et comme ça on peut intégrer dès ce cycle, tester, et l'activer en vrai prochain cycle).

Ça m'irait bien d'avoir un feature flag pour contrôler ça, pour débuter, ça donnerait : ``` class FtsMatch(...): def as_sql(self): if get_publisher().has_site_option('enable-new-fts'): return 'fts @@ wcs_tsquery(%%(c%s)s)' % id(self.value) else: return 'fts @@ plainto_tsquery(%%(c%s)s)' % id(self.value) ``` (et comme ça on peut intégrer dès ce cycle, tester, et l'activer en vrai prochain cycle).

J'avais pas en tête les feature flags de wcs, je fais ça

J'avais pas en tête les feature flags de wcs, je fais ça
def as_sql(self):
return 'fts @@ wcs_tsquery(%%(c%s)s)' % id(self.value)
if get_publisher().has_site_option('enable-new-fts'):
return 'fts @@ wcs_tsquery(%%(c%s)s)' % id(self.value)
else:
return 'fts @@ plainto_tsquery(%%(c%s)s)' % id(self.value)
class ElementEqual(Criteria):