wcs_search_tokens: first part, create and maintain table (#86527)

This commit is contained in:
Pierre Ducroquet 2024-02-28 10:48:39 +01:00
parent 499adec1bb
commit d48bd8d392
1 changed files with 64 additions and 1 deletions

View File

@ -1579,6 +1579,8 @@ def do_global_views(conn, cur):
% (name, category.id)
)
init_search_tokens_triggers(cur)
def clean_global_views(conn, cur):
# Purge of any dead data
@ -1671,11 +1673,68 @@ def init_global_table(conn=None, cur=None):
endpoint_status=endpoint_status_filter,
)
)
init_search_tokens_data(cur)
if own_conn:
cur.close()
def init_search_tokens(conn=None, cur=None):
own_cur = False
if not cur:
own_cur = True
conn, cur = get_connection_and_cursor()
cur.execute("SELECT 1 FROM pg_class WHERE relname = 'wcs_search_tokens';")
rows = cur.fetchall()
if len(rows) == 1:
# table already exists, nothing to do.
if own_cur:
cur.close()
return
# Create table
cur.execute("CREATE TABLE wcs_search_tokens(token TEXT PRIMARY KEY);")
# Create triggers from wcs_all_forms
init_search_tokens_triggers(cur)
# Fill table from wcs_all_forms
init_search_tokens_data(cur)
# Index at the end, small performance trick... not that useful, but it's free...
cur.execute("CREATE INDEX IF NOT EXISTS wcs_search_tokens_trgm ON wcs_search_tokens USING gin(token gin_trgm_ops);")
if own_cur:
cur.close()
def init_search_tokens_triggers(cur):
# We define only appending triggers, ie on INSERT and UPDATE.
# It would be far heavier to maintain deletions here, and having extra data has
# no or marginal side effect on search performances, and absolutely no impact
# on search results.
# Instead, a weekly cron job will delete obsolete entries, thus making it sure no
# personal data is kept uselessly.
# First part: the appending function
cur.execute("""CREATE OR REPLACE FUNCTION wcs_search_tokens_trigger_fn ()
RETURNS trigger
LANGUAGE plpgsql
AS $function$
BEGIN
INSERT INTO wcs_search_tokens SELECT unnest(tsvector_to_array(NEW.fts)) ON CONFLICT(token) DO NOTHING;
RETURN NEW;
END;
$function$;""")
# Second part : insert and update triggers
cur.execute("CREATE OR REPLACE TRIGGER wcs_all_forms_fts_trg_ins AFTER INSERT ON wcs_all_forms FOR EACH ROW WHEN (NEW.fts IS NOT NULL) EXECUTE PROCEDURE wcs_search_tokens_trigger_fn();")
cur.execute("CREATE OR REPLACE TRIGGER wcs_all_forms_fts_trg_upd AFTER UPDATE OF fts ON wcs_all_forms FOR EACH ROW WHEN (NEW.fts IS NOT NULL) EXECUTE PROCEDURE wcs_search_tokens_trigger_fn();")
def init_search_tokens_data(cur):
cur.execute("INSERT INTO wcs_search_tokens SELECT unnest(tsvector_to_array(fts)) FROM wcs_all_forms ON CONFLICT(token) DO NOTHING;")
class SqlMixin:
_table_name = None
_numerical_id = True
@ -5100,7 +5159,7 @@ def get_period_total(
# latest migration, number + description (description is not used
# programmaticaly but will make sure git conflicts if two migrations are
# separately added with the same number)
SQL_LEVEL = (105, 'change test result json structure')
SQL_LEVEL = (106, 'improved fts method')
def migrate_global_views(conn, cur):
@ -5433,6 +5492,10 @@ def migrate():
for formdef in FormDef.select() + CardDef.select():
do_formdef_tables(formdef, rebuild_views=False, rebuild_global_views=False)
if sql_level < 106:
# 106: new fts mechanism with tokens table
init_search_tokens()
if sql_level != SQL_LEVEL[0]:
cur.execute(
'''UPDATE wcs_meta SET value = %s, updated_at=NOW() WHERE key = %s''',