general: add management command to import from tabellio database

This commit is contained in:
Frédéric Péters 2016-05-12 15:09:47 +02:00
parent 6d8284d9dd
commit 5bed45419b
4 changed files with 116 additions and 0 deletions

View File

@ -96,6 +96,8 @@ STATICFILES_DIRS = (
)
TABELLIO_DB_SETTINGS = {}
local_settings_file = os.environ.get('PFWB_THESAURUS_SETTINGS_FILE',
os.path.join(os.path.dirname(__file__), 'local_settings.py'))
if os.path.exists(local_settings_file):

View File

@ -0,0 +1,114 @@
# pfwb_thesaurus - thesaurus system
# Copyright (C) 2016 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import psycopg2
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from ...models import Term, AlternativeTerm
class Command(BaseCommand):
def handle(self, *args, **kwargs):
pgconn = psycopg2.connect(**settings.TABELLIO_DB_SETTINGS)
cur = pgconn.cursor()
cur.execute('''SELECT t_thesaurus_term.id,
t_thesaurus_term.term,
ARRAY(
SELECT term
FROM t_thesaurus_term AS t4,
t_thesaurus_rel AS t5
WHERE t4.id = t5.primtermid
AND t5.rel = 'EQ'
AND t5.sectermid = t_thesaurus_term.id) AS eq,
(SELECT t2.term FROM t_thesaurus_term AS t2,
t_thesaurus_rel
WHERE t_thesaurus_rel.primtermid = t_thesaurus_term.id
AND t_thesaurus_rel.rel = 'HN'
AND t2.id = t_thesaurus_rel.sectermid) AS hn,
(SELECT t3.term FROM t_thesaurus_term AS t3,
t_thesaurus_rel
WHERE t_thesaurus_rel.primtermid = t_thesaurus_term.id
AND t_thesaurus_rel.rel = 'SN'
AND t3.id = t_thesaurus_rel.sectermid
LIMIT 1) AS sn
FROM t_thesaurus_term
WHERE EXISTS (
SELECT id
FROM t_thesaurus_rel
WHERE (sectermid = t_thesaurus_term.id
OR primtermid = t_thesaurus_term.id)
AND rel in ('BT', 'RT'))
''')
while True:
row = cur.fetchone()
if row is None:
break
term, created = Term.objects.get_or_create(tabellio_id=row[0])
term.term = row[1]
term.historical_note = row[3] or ''
term.scope_note = row[4] or ''
current_equivs = set([x.term for x in term.alternativeterm_set.all()])
new_equivs = set(row[2])
term.save()
term.alternativeterm_set.filter(term__in=list(current_equivs - new_equivs)).delete()
for alt_term in new_equivs - current_equivs:
alt_term_object = AlternativeTerm(term=alt_term, primary_term=term)
alt_term_object.save()
bt_rels = {}
rt_rels = {}
cur.execute('''SELECT primtermid, sectermid, rel
FROM t_thesaurus_rel
WHERE rel IN ('BT', 'RT')
''')
while True:
row = cur.fetchone()
if row is None:
break
primtermid, sectermid, rel = row
if rel == 'BT':
if not bt_rels.get(primtermid):
bt_rels[primtermid] = []
bt_rels[primtermid].append(sectermid)
elif rel == 'RT':
if not rt_rels.get(primtermid):
rt_rels[primtermid] = []
rt_rels[primtermid].append(sectermid)
for term, broader_term_ids in bt_rels.items():
term = Term.objects.get(tabellio_id=term)
broader_terms = Term.objects.filter(tabellio_id__in=broader_term_ids)
current_set = set([x.id for x in term.broader.all()])
new_set = set([x.id for x in broader_terms])
for removed_term in current_set - new_set:
term.broader.remove(removed_term)
for added_term in new_set - current_set:
term.broader.add(added_term)
for term, related_term_ids in rt_rels.items():
term = Term.objects.get(tabellio_id=term)
related_terms = Term.objects.filter(tabellio_id__in=related_term_ids)
current_set = set([x.id for x in term.related.all()])
new_set = set([x.id for x in related_terms])
for removed_term in current_set - new_set:
term.related.remove(removed_term)
for added_term in new_set - current_set:
term.related.add(added_term)
pgconn.close()