This repository has been archived on 2023-02-21. You can view files and clone it, but cannot push or open issues or pull requests.
themis.importexport/themis/importexport/sync.py

496 lines
22 KiB
Python

# -*- coding: utf-8 -*-
import datetime
import json
import time
import urllib2
import base64
import logging
from DateTime.DateTime import DateTime
import transaction
from zope.event import notify
from zope.lifecycleevent import ObjectAddedEvent, ObjectModifiedEvent
from z3c.relationfield import RelationValue
from plone.app.textfield.value import RichTextValue
from plone.namedfile.file import NamedBlobImage, NamedBlobFile
from themis.datatypes.address import Address
from utils import UtilityView
log = logging.getLogger('Plone')
class SyncFromThemis(UtilityView):
src_url = 'http://localhost:8081/themis'
def urlopen(self, url):
if not self.request.form.get('username'):
return urllib2.urlopen(url)
credentials = (self.request.form.get('username'),
self.request.form.get('password'))
# Plone won't get a first answer with a 401/Authenticate, this means
# we have to force the authorization header manually, we can't rely
# on urllib2 http auth handling.
req = urllib2.Request(url, None,
{'Authorization': 'Basic ' + base64.b64encode("%s:%s" % credentials).strip()})
return urllib2.urlopen(req)
def __call__(self):
self.setup()
self.src_url = self.request.form.get('src', self.src_url)
def format_duration(v):
r = ''
if v > 3600:
r = r + '%dh' % int(v/3600)
v = v % 3600
if v > 60 or r:
r = r + '%02dm' % int(v/60)
v = v % 60
r = r + '%02ds' % int(v)
return r
timestamp = self.request.form.get('timestamp')
objects = self.request.form.get('objects', '').split(',')
log.info('sync: start [%r] [%r]' % (timestamp, objects))
for object in ('deputies', 'ministries', 'commissions', 'documents',
'blocked_documents', 'deleted_documents'):
if not 'all' in objects:
if not object in objects:
continue
t0 = time.time()
result = getattr(self, 'sync_'+object)(timestamp)
duration = time.time() - t0
# when deleted docs sync called, return its result
if object in ('deleted_documents',):
return result
log.info('sync: %-14s: %6s [%s]' % (object, str(result)[:20], format_duration(duration)))
return 'OK'
def create_relation_value_if_needed(self, current_value, intid):
if intid is None:
return None
if current_value is None or current_value.to_id != intid:
return RelationValue(intid)
return current_value
def create_relation_value_list_if_needed(self, current_value, intids):
if not intids:
return []
if current_value:
current_intids = list(sorted([x.to_id for x in current_value]))
else:
current_intids = []
if current_value is None or current_intids != intids:
return [RelationValue(x) for x in intids]
return current_value
def sync_deputies(self, timestamp=None):
deputies = json.load(self.urlopen('%s/@@listDeputies' % self.src_url)) # XXX: add timestamp
for deputy_url in deputies:
data = json.load(self.urlopen('%s/json' % deputy_url))
new_id = data.get('id')
if not hasattr(self.deputies_folder, new_id):
self.deputies_folder.invokeFactory('themis.datatypes.deputy', new_id,
firstname=data.get('firstname'),
lastname=data.get('lastname'))
object = getattr(self.deputies_folder, new_id)
object.firstname = data.get('firstname')
object.lastname = data.get('lastname')
object.active = data.get('active')
object.sex = data.get('sex')
object.district = data.get('district')
object.birthplace = data.get('birthplace')
object.bio = data.get('bio')
object.website = data.get('website')
object.degrees = data.get('degrees')
object.mandates = data.get('mandates')
object.profession = data.get('profession')
object.seat_number = data.get('seat_number')
if data.get('birthdate'):
object.birthdate = datetime.date.fromordinal(
datetime.datetime.strptime(
data.get('birthdate'), '%Y-%m-%d').toordinal())
else:
object.birthdate = None
object.polgroup = self.create_relation_value_if_needed(object.polgroup,
self.get_polgroup_intid(data.get('polgroup')))
if data.get('picture'):
# this will be an url
content = self.urlopen(data.get('picture')).read()
if not hasattr(object, 'picture'):
object.picture = None
if object.picture is None or object.picture.data != content:
object.picture = NamedBlobImage(content, filename=data.get('id'))
if data.get('past_functions'):
object.past_functions = RichTextValue(
raw=data.get('past_functions'),
mimeType='text/html',
outputMimeType='text/x-html-safe')
else:
object.past_functions = None
if data.get('current_functions'):
object.current_functions = RichTextValue(
raw=data.get('current_functions'),
mimeType='text/html',
outputMimeType='text/x-html-safe')
else:
object.current_functions = None
if data.get('biography'):
object.biography = RichTextValue(
raw=data.get('biography'),
mimeType='text/html',
outputMimeType='text/x-html-safe')
else:
object.biography = None
for address in ('private_address', 'work_address', 'work_address_2'):
if not data.get(address):
setattr(object, address, None)
continue
setattr(object, address, Address.from_dict(data.get(address)))
notify(ObjectModifiedEvent(object))
self.publish(object)
def sync_ministries(self, timestamp=None):
pass
def sync_commissions(self, timestamp=None):
commissions = json.load(self.urlopen('%s/@@listCommissions' % self.src_url)) # XXX: add timestamp
for commission_url in commissions:
data = json.load(self.urlopen('%s/json' % commission_url))
new_id = data.get('id')
if not hasattr(self.commissions_folder, new_id):
self.commissions_folder.invokeFactory('themis.datatypes.commission', new_id,
title=data.get('title'))
object = getattr(self.commissions_folder, new_id)
object.active = data.get('active')
object.president = self.create_relation_value_if_needed(object.president,
self.get_author_intid('deputy:%s' % data.get('president')))
object.vicepresidents = self.create_relation_value_list_if_needed(
object.vicepresidents,
[self.get_author_intid('deputy:%s' % x) for x in data.get('vicepresidents', [])])
object.members = self.create_relation_value_list_if_needed(
object.members,
[self.get_author_intid('deputy:%s' % x) for x in data.get('members', [])])
object.substitutes = self.create_relation_value_list_if_needed(
object.substitutes,
[self.get_author_intid('deputy:%s' % x) for x in data.get('substitutes', [])])
if data.get('competences'):
object.competences = data.get('competences')
notify(ObjectModifiedEvent(object))
self.publish(object)
def get_author_intid(self, author):
try:
obj = getattr(self.deputies_folder, author.split(':', 1)[1])
except AttributeError:
log.warn('failed to get author %r' % author)
return None
obj_intid = self.intids.getId(obj)
return obj_intid
def sync_documents(self, timestamp=None):
qs = ''
if timestamp:
qs = '?timestamp=%s' % timestamp
log.info('loading documents list from %s' % self.src_url)
documents = json.load(self.urlopen('%s/@@listDocuments%s' % (self.src_url, qs)))
log.info('loading documents list from %s -> DONE' % self.src_url)
count = 0
for count, document_url in enumerate(documents):
log.info('loading %s' % document_url)
try:
data = json.load(self.urlopen('%s/json' % document_url))
except urllib2.HTTPError as e:
logging.info('failed to load (%r)' % e)
continue
new_id = data.get('id')
is_question = False
if data.get('portal_type') in ('Question_ecriteD',
'questionactualiteD', 'QuestionoraleD',
'interpellationD'):
if not hasattr(self.questions_folder, new_id):
self.questions_folder.invokeFactory('tabellio.documents.question', new_id,
title = data.get('title'))
is_question = True
object = getattr(self.questions_folder, new_id)
elif data.get('portal_type') in ('convocation_commissionD',
'convocation_seanceD'):
if not hasattr(self.convocations_folder, new_id):
self.convocations_folder.invokeFactory('tabellio.documents.document', new_id,
title = data.get('title'))
object = getattr(self.convocations_folder, new_id)
else:
if not hasattr(self.documents_folder, new_id):
self.documents_folder.invokeFactory('tabellio.documents.document', new_id,
title = data.get('title'))
object = getattr(self.documents_folder, new_id)
if data.get('title'):
object.title = data.get('title')
if data.get('session'):
object.session = data.get('session')
if data.get('fichier'):
content = self.urlopen(data.get('fichier')).read()
if not hasattr(object, 'file'):
object.file = None
if object.file is None or object.file.data != content:
object.file = NamedBlobFile(content, filename=data.get('id')+'.pdf')
if data.get('document_imprime'):
content = self.urlopen(data.get('document_imprime')).read()
if not hasattr(object, 'file'):
object.file = None
if object.file is None or object.file.data != content:
object.file = NamedBlobFile(content, filename=data.get('id')+'.pdf')
if data.get('object_type'):
if is_question:
object.questype = data.get('object_type').replace(' (D)', '')
else:
object.doctype = data.get('object_type').replace(' (D)', '')
if data.get('matieres'):
object.topics = data.get('matieres')
for attr in ('date_seance', 'date_seance_pleniere',
'date_de_la_commission', 'date_document_imprime',
'date_de_reception', 'date_reception',
'date_du_document', 'date_de_publication',
'date_du_rapport'):
if data.get(attr):
object.date = datetime.date.fromordinal(
datetime.datetime.strptime(
data.get(attr), '%Y-%m-%d').toordinal())
# initializing speakers list
speakers = []
for attr in ('orateurs_seance', 'orateurs', 'orateurs_en_commission',
'orateurs_en_seanceprop', 'orateurs_rapportcom',
'orateurs_seance_reponse_orale'):
speakers = list(set(speakers + (data.get(attr) or [])))
if not hasattr(object, 'speakers'):
object.speakers = []
speakers_intids = [self.get_author_intid(x) for x in speakers]
object.speakers = self.create_relation_value_list_if_needed(
object.speakers, speakers_intids)
if not hasattr(object, 'reporters'):
object.reporters = []
reporters_intids = [self.get_author_intid(x) for x in data.get('rapporteurs') or []]
object.reporters = self.create_relation_value_list_if_needed(
object.reporters, reporters_intids)
if data.get('date_sanction_promulgation'):
object.sanction_date = datetime.date.fromordinal(
datetime.datetime.strptime(
data.get('date_sanction_promulgation'), '%Y-%m-%d').toordinal())
else:
object.sanction_date = None
if data.get('reponse_a_question_ecrite'):
object.written_answer = data.get('reponse_a_question_ecrite')[0]
else:
object.written_answer = None
if data.get('date_seance_ou_commission'):
object.seance_com_date = datetime.date.fromordinal(
datetime.datetime.strptime(
data.get('date_seance_ou_commission'),
'%Y-%m-%d').toordinal())
else:
object.seance_com_date = None
for attr in ('numero_biq', 'numero_document', 'numero_bqr'):
if data.get(attr):
object.no = data.get(attr)
if data.get('numero_suite'):
object.nodoc = data.get('numero_suite')
related_doc_ids = []
for attr in ( 'autres_documents_lies_a_proposition',
'autres_documents_lies_au_projet',
'autres_documents_ou_dossiers_en_relation',
'bulletin_questions_ecrites',
'courriers_lies',
'cr_debats_declaration_politique_generale',
'cr_debats_declaration_programme',
'cr_reponse_orale',
'cr_seance',
'cr_seance_ou_biq',
'cr_seance_pleniere',
'cr_seance_prise_en_consideration',
'cr_seance_vote_motion',
'cr_vote_declaration_programme',
'docs_pointing',
'documents_contenus_dans_biq',
'documents_contenus_dans_bqr',
'documents_lies',
'documents_lies_budgetpfb',
'documents_lies_proposition_bt_rapporcom',
'document_s__li__s____ce_projet__bulletin_des_travaux__rapport_de_commission_',
'documents_r_f_ren_ant_ce_compte_rendu',
'documents_r_f_ren_ants',
'documents_r_f_renc_s',
'lien_qo_ou_qe',
'lien_vers_courrier_reponse',
'lien_vers_interpellation_ou_qe',
'question__ecrite_lien',
'reponses_ministre_lien'):
if not data.get(attr):
continue
related_doc_ids.extend(data.get(attr))
if related_doc_ids:
related_docs = []
for related_doc_id in related_doc_ids:
if related_doc_id is None:
continue
if hasattr(self.questions_folder, related_doc_id):
related_doc = getattr(self.questions_folder, related_doc_id)
elif hasattr(self.documents_folder, related_doc_id):
related_doc = getattr(self.documents_folder, related_doc_id)
elif hasattr(self.convocations_folder, related_doc_id):
related_doc = getattr(self.convocations_folder, related_doc_id)
else:
continue
related_docs.append(self.intids.getId(related_doc))
object.related_docs = self.create_relation_value_list_if_needed(
object.related_docs, related_docs)
else:
object.related_docs = []
authors_v = None
if data.get('auteur'):
authors_v = data.get('auteur')
if type(authors_v) is not list:
authors_v = [authors_v]
elif data.get('auteurs'):
authors_v = data.get('auteurs')
if authors_v == ['ministry:college']:
object.author_is_government = True
elif authors_v:
authors_intids = [self.get_author_intid(x) for x in authors_v]
if None in authors_intids:
log.warn('unknown error in doc %s' % object.id)
authors_intids = [x for x in authors_intids if x is not None]
object.authors = self.create_relation_value_list_if_needed(
object.authors, authors_intids)
object.author_is_government = False
else:
object.authors = None
object.author_is_government = False
if data.get('etat_en_commission') and data.get('etat_en_commission') != u'Indéterminé':
object.commission_status = data.get('etat_en_commission')
else:
object.commission_status = None
if data.get('vote_seance') and data.get('vote_seance') != u'Indéterminé':
object.seance_vote = data.get('vote_seance')
else:
object.seance_vote = None
if data.get('portal_type') in ('PropositionD', 'ProjetD'):
if data.get('date_seance'):
object.seance_vote_date = datetime.date.fromordinal(
datetime.datetime.strptime(
data.get('date_seance'),
'%Y-%m-%d').toordinal())
else:
object.seance_vote_date = None
if data.get('vote_seance') == u'Adoption':
object.adopted = True
if data.get('date_approbation_tutelle'):
object.adoption_date = DateTime(data.get('date_approbation_tutelle'))
else:
object.adoption_date = None
if data.get('date_publication_moniteur_belge'):
object.moniteur_date = DateTime(data.get('date_publication_moniteur_belge'))
else:
object.moniteur_date = None
else:
object.adopted = False
object.adoption_date = None
object.moniteur_date = None
if is_question:
pass # XXX: import link to CRI
try:
notify(ObjectModifiedEvent(object))
except KeyError:
# ignore zc.relation errors that could happen in there.
pass
self.publish(object)
if count % 100 == 0:
transaction.commit()
log.info('documents done')
return count
def sync_blocked_documents(self, timestamp=None):
qs = ''
if timestamp:
qs = '?timestamp=%s' % timestamp
log.info('loading blocked documents list from %s' % self.src_url)
documents = json.load(self.urlopen('%s/@@listBlockedDocuments%s' % (self.src_url, qs)))
log.info('loading blocked documents list from %s -> DONE' % self.src_url)
for count, document_url in enumerate(documents):
log.info('loading %s' % document_url)
data = json.load(self.urlopen('%s/json' % document_url))
new_id = data.get('id')
is_question = False
if data.get('portal_type') in ('Question_ecriteD',
'questionactualiteD', 'QuestionoraleD',
'interpellationD'):
if hasattr(self.questions_folder, new_id):
self.questions_folder.manage_delObjects([new_id])
elif data.get('portal_type') in ('convocation_commissionD',
'convocation_seanceD'):
if not hasattr(self.convocations_folder, new_id):
self.convocations_folder.manage_delObjects([new_id])
else:
if hasattr(self.documents_folder, new_id):
self.documents_folder.manage_delObjects([new_id])
if count % 100 == 0:
transaction.commit()
log.info('blocked documents done')
def sync_deleted_documents(self, *args):
remote_documents = json.load(self.urlopen('%s/@@listDocuments' % self.src_url))
remote_document_ids = []
for remote_document_url in remote_documents:
remote_document_ids.append(remote_document_url.split('/')[-1])
local_document_ids = [doc_id for doc_id in self.documents_folder]
documents_to_purge = set(local_document_ids) - set(remote_document_ids)
if self.request.form.get('delete') == 'yes':
log.info('deleted documents sync')
for doc_id in documents_to_purge:
self.documents_folder.manage_delObjects([doc_id])
log.info('deleted documents sync done')
return 'OK'
return json.dumps(list(documents_to_purge))