general: add management command to import from tabellio database
This commit is contained in:
parent
6d8284d9dd
commit
5bed45419b
|
@ -96,6 +96,8 @@ STATICFILES_DIRS = (
|
|||
)
|
||||
|
||||
|
||||
TABELLIO_DB_SETTINGS = {}
|
||||
|
||||
local_settings_file = os.environ.get('PFWB_THESAURUS_SETTINGS_FILE',
|
||||
os.path.join(os.path.dirname(__file__), 'local_settings.py'))
|
||||
if os.path.exists(local_settings_file):
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
# pfwb_thesaurus - thesaurus system
|
||||
# Copyright (C) 2016 Entr'ouvert
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it
|
||||
# under the terms of the GNU Affero General Public License as published
|
||||
# by the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import psycopg2
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from ...models import Term, AlternativeTerm
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
def handle(self, *args, **kwargs):
|
||||
pgconn = psycopg2.connect(**settings.TABELLIO_DB_SETTINGS)
|
||||
cur = pgconn.cursor()
|
||||
|
||||
cur.execute('''SELECT t_thesaurus_term.id,
|
||||
t_thesaurus_term.term,
|
||||
ARRAY(
|
||||
SELECT term
|
||||
FROM t_thesaurus_term AS t4,
|
||||
t_thesaurus_rel AS t5
|
||||
WHERE t4.id = t5.primtermid
|
||||
AND t5.rel = 'EQ'
|
||||
AND t5.sectermid = t_thesaurus_term.id) AS eq,
|
||||
(SELECT t2.term FROM t_thesaurus_term AS t2,
|
||||
t_thesaurus_rel
|
||||
WHERE t_thesaurus_rel.primtermid = t_thesaurus_term.id
|
||||
AND t_thesaurus_rel.rel = 'HN'
|
||||
AND t2.id = t_thesaurus_rel.sectermid) AS hn,
|
||||
(SELECT t3.term FROM t_thesaurus_term AS t3,
|
||||
t_thesaurus_rel
|
||||
WHERE t_thesaurus_rel.primtermid = t_thesaurus_term.id
|
||||
AND t_thesaurus_rel.rel = 'SN'
|
||||
AND t3.id = t_thesaurus_rel.sectermid
|
||||
LIMIT 1) AS sn
|
||||
FROM t_thesaurus_term
|
||||
WHERE EXISTS (
|
||||
SELECT id
|
||||
FROM t_thesaurus_rel
|
||||
WHERE (sectermid = t_thesaurus_term.id
|
||||
OR primtermid = t_thesaurus_term.id)
|
||||
AND rel in ('BT', 'RT'))
|
||||
''')
|
||||
while True:
|
||||
row = cur.fetchone()
|
||||
if row is None:
|
||||
break
|
||||
term, created = Term.objects.get_or_create(tabellio_id=row[0])
|
||||
term.term = row[1]
|
||||
term.historical_note = row[3] or ''
|
||||
term.scope_note = row[4] or ''
|
||||
current_equivs = set([x.term for x in term.alternativeterm_set.all()])
|
||||
new_equivs = set(row[2])
|
||||
term.save()
|
||||
term.alternativeterm_set.filter(term__in=list(current_equivs - new_equivs)).delete()
|
||||
for alt_term in new_equivs - current_equivs:
|
||||
alt_term_object = AlternativeTerm(term=alt_term, primary_term=term)
|
||||
alt_term_object.save()
|
||||
|
||||
bt_rels = {}
|
||||
rt_rels = {}
|
||||
cur.execute('''SELECT primtermid, sectermid, rel
|
||||
FROM t_thesaurus_rel
|
||||
WHERE rel IN ('BT', 'RT')
|
||||
''')
|
||||
while True:
|
||||
row = cur.fetchone()
|
||||
if row is None:
|
||||
break
|
||||
primtermid, sectermid, rel = row
|
||||
if rel == 'BT':
|
||||
if not bt_rels.get(primtermid):
|
||||
bt_rels[primtermid] = []
|
||||
bt_rels[primtermid].append(sectermid)
|
||||
elif rel == 'RT':
|
||||
if not rt_rels.get(primtermid):
|
||||
rt_rels[primtermid] = []
|
||||
rt_rels[primtermid].append(sectermid)
|
||||
|
||||
for term, broader_term_ids in bt_rels.items():
|
||||
term = Term.objects.get(tabellio_id=term)
|
||||
broader_terms = Term.objects.filter(tabellio_id__in=broader_term_ids)
|
||||
current_set = set([x.id for x in term.broader.all()])
|
||||
new_set = set([x.id for x in broader_terms])
|
||||
for removed_term in current_set - new_set:
|
||||
term.broader.remove(removed_term)
|
||||
for added_term in new_set - current_set:
|
||||
term.broader.add(added_term)
|
||||
|
||||
for term, related_term_ids in rt_rels.items():
|
||||
term = Term.objects.get(tabellio_id=term)
|
||||
related_terms = Term.objects.filter(tabellio_id__in=related_term_ids)
|
||||
current_set = set([x.id for x in term.related.all()])
|
||||
new_set = set([x.id for x in related_terms])
|
||||
for removed_term in current_set - new_set:
|
||||
term.related.remove(removed_term)
|
||||
for added_term in new_set - current_set:
|
||||
term.related.add(added_term)
|
||||
|
||||
pgconn.close()
|
Reference in New Issue