# pfwb_thesaurus - thesaurus system # Copyright (C) 2016 Entr'ouvert # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . import psycopg2 from django.conf import settings from django.core.management.base import BaseCommand, CommandError from ...models import Term, AlternativeTerm class Command(BaseCommand): def handle(self, *args, **kwargs): pgconn = psycopg2.connect(**settings.TABELLIO_DB_SETTINGS) cur = pgconn.cursor() cur.execute('''SELECT t_thesaurus_term.id, t_thesaurus_term.term, ARRAY( SELECT term FROM t_thesaurus_term AS t4, t_thesaurus_rel AS t5 WHERE t4.id = t5.primtermid AND t5.rel = 'EQ' AND t5.sectermid = t_thesaurus_term.id) AS eq, (SELECT t2.term FROM t_thesaurus_term AS t2, t_thesaurus_rel WHERE t_thesaurus_rel.primtermid = t_thesaurus_term.id AND t_thesaurus_rel.rel = 'HN' AND t2.id = t_thesaurus_rel.sectermid) AS hn, (SELECT t3.term FROM t_thesaurus_term AS t3, t_thesaurus_rel WHERE t_thesaurus_rel.primtermid = t_thesaurus_term.id AND t_thesaurus_rel.rel = 'SN' AND t3.id = t_thesaurus_rel.sectermid LIMIT 1) AS sn FROM t_thesaurus_term WHERE EXISTS ( SELECT id FROM t_thesaurus_rel WHERE (sectermid = t_thesaurus_term.id OR primtermid = t_thesaurus_term.id) AND rel in ('BT', 'RT')) ''') while True: row = cur.fetchone() if row is None: break term, created = Term.objects.get_or_create(tabellio_id=row[0]) term.term = row[1] term.historical_note = row[3] or '' term.scope_note = row[4] or '' current_equivs = set([x.term for x in term.alternativeterm_set.all()]) new_equivs = set(row[2]) term.save() term.alternativeterm_set.filter(term__in=list(current_equivs - new_equivs)).delete() for alt_term in new_equivs - current_equivs: alt_term_object = AlternativeTerm(term=alt_term, primary_term=term) alt_term_object.save() bt_rels = {} rt_rels = {} cur.execute('''SELECT primtermid, sectermid, rel FROM t_thesaurus_rel WHERE rel IN ('BT', 'RT') ''') while True: row = cur.fetchone() if row is None: break primtermid, sectermid, rel = row if rel == 'BT': if not bt_rels.get(primtermid): bt_rels[primtermid] = [] bt_rels[primtermid].append(sectermid) elif rel == 'RT': if not rt_rels.get(primtermid): rt_rels[primtermid] = [] rt_rels[primtermid].append(sectermid) for term, broader_term_ids in bt_rels.items(): term = Term.objects.get(tabellio_id=term) broader_terms = Term.objects.filter(tabellio_id__in=broader_term_ids) current_set = set([x.id for x in term.broader.all()]) new_set = set([x.id for x in broader_terms]) for removed_term in current_set - new_set: term.broader.remove(removed_term) for added_term in new_set - current_set: term.broader.add(added_term) for term, related_term_ids in rt_rels.items(): term = Term.objects.get(tabellio_id=term) related_terms = Term.objects.filter(tabellio_id__in=related_term_ids) current_set = set([x.id for x in term.related.all()]) new_set = set([x.id for x in related_terms]) for removed_term in current_set - new_set: term.related.remove(removed_term) for added_term in new_set - current_set: term.related.add(added_term) pgconn.close()