494 lines
22 KiB
Python
494 lines
22 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
import datetime
|
|
import json
|
|
import time
|
|
import urllib2
|
|
import base64
|
|
import logging
|
|
|
|
from DateTime.DateTime import DateTime
|
|
|
|
import transaction
|
|
from zope.event import notify
|
|
from zope.lifecycleevent import ObjectAddedEvent, ObjectModifiedEvent
|
|
from z3c.relationfield import RelationValue
|
|
from plone.app.textfield.value import RichTextValue
|
|
|
|
from plone.namedfile.file import NamedBlobImage, NamedBlobFile
|
|
|
|
from themis.datatypes.address import Address
|
|
|
|
from utils import UtilityView
|
|
|
|
log = logging.getLogger('Plone')
|
|
|
|
class SyncFromThemis(UtilityView):
|
|
src_url = 'http://localhost:8081/themis'
|
|
|
|
def urlopen(self, url):
|
|
if not self.request.form.get('username'):
|
|
return urllib2.urlopen(url)
|
|
credentials = (self.request.form.get('username'),
|
|
self.request.form.get('password'))
|
|
# Plone won't get a first answer with a 401/Authenticate, this means
|
|
# we have to force the authorization header manually, we can't rely
|
|
# on urllib2 http auth handling.
|
|
req = urllib2.Request(url, None,
|
|
{'Authorization': 'Basic ' + base64.b64encode("%s:%s" % credentials).strip()})
|
|
return urllib2.urlopen(req)
|
|
|
|
def __call__(self):
|
|
self.setup()
|
|
self.src_url = self.request.form.get('src', self.src_url)
|
|
|
|
def format_duration(v):
|
|
r = ''
|
|
if v > 3600:
|
|
r = r + '%dh' % int(v/3600)
|
|
v = v % 3600
|
|
if v > 60 or r:
|
|
r = r + '%02dm' % int(v/60)
|
|
v = v % 60
|
|
r = r + '%02ds' % int(v)
|
|
return r
|
|
|
|
timestamp = self.request.form.get('timestamp')
|
|
objects = self.request.form.get('objects', '').split(',')
|
|
log.info('sync: start [%r] [%r]' % (timestamp, objects))
|
|
for object in ('deputies', 'ministries', 'commissions', 'documents',
|
|
'blocked_documents', 'deleted_documents'):
|
|
if not 'all' in objects:
|
|
if not object in objects:
|
|
continue
|
|
t0 = time.time()
|
|
result = getattr(self, 'sync_'+object)(timestamp)
|
|
duration = time.time() - t0
|
|
|
|
# when deleted docs sync called, return its result
|
|
if object in ('deleted_documents',):
|
|
return result
|
|
log.info('sync: %-14s: %6s [%s]' % (object, str(result)[:20], format_duration(duration)))
|
|
|
|
return 'OK'
|
|
|
|
def create_relation_value_if_needed(self, current_value, intid):
|
|
if intid is None:
|
|
return None
|
|
if current_value is None or current_value.to_id != intid:
|
|
return RelationValue(intid)
|
|
return current_value
|
|
|
|
def create_relation_value_list_if_needed(self, current_value, intids):
|
|
if not intids:
|
|
return []
|
|
if current_value:
|
|
current_intids = list(sorted([x.to_id for x in current_value]))
|
|
else:
|
|
current_intids = []
|
|
if current_value is None or current_intids != intids:
|
|
return [RelationValue(x) for x in intids]
|
|
return current_value
|
|
|
|
def sync_deputies(self, timestamp=None):
|
|
deputies = json.load(self.urlopen('%s/@@listDeputies' % self.src_url)) # XXX: add timestamp
|
|
|
|
for deputy_url in deputies:
|
|
data = json.load(self.urlopen('%s/json' % deputy_url))
|
|
|
|
new_id = data.get('id')
|
|
if not hasattr(self.deputies_folder, new_id):
|
|
self.deputies_folder.invokeFactory('themis.datatypes.deputy', new_id,
|
|
firstname=data.get('firstname'),
|
|
lastname=data.get('lastname'))
|
|
object = getattr(self.deputies_folder, new_id)
|
|
object.firstname = data.get('firstname')
|
|
object.lastname = data.get('lastname')
|
|
object.active = data.get('active')
|
|
object.sex = data.get('sex')
|
|
object.district = data.get('district')
|
|
object.birthplace = data.get('birthplace')
|
|
object.bio = data.get('bio')
|
|
object.website = data.get('website')
|
|
object.degrees = data.get('degrees')
|
|
object.mandates = data.get('mandates')
|
|
object.profession = data.get('profession')
|
|
object.seat_number = data.get('seat_number')
|
|
if data.get('birthdate'):
|
|
object.birthdate = datetime.date.fromordinal(
|
|
datetime.datetime.strptime(
|
|
data.get('birthdate'), '%Y-%m-%d').toordinal())
|
|
else:
|
|
object.birthdate = None
|
|
object.polgroup = self.create_relation_value_if_needed(object.polgroup,
|
|
self.get_polgroup_intid(data.get('polgroup')))
|
|
if data.get('picture'):
|
|
# this will be an url
|
|
content = self.urlopen(data.get('picture')).read()
|
|
if not hasattr(object, 'picture'):
|
|
object.picture = None
|
|
if object.picture is None or object.picture.data != content:
|
|
object.picture = NamedBlobImage(content, filename=data.get('id'))
|
|
if data.get('past_functions'):
|
|
object.past_functions = RichTextValue(
|
|
raw=data.get('past_functions'),
|
|
mimeType='text/html',
|
|
outputMimeType='text/x-html-safe')
|
|
else:
|
|
object.past_functions = None
|
|
if data.get('current_functions'):
|
|
object.current_functions = RichTextValue(
|
|
raw=data.get('current_functions'),
|
|
mimeType='text/html',
|
|
outputMimeType='text/x-html-safe')
|
|
else:
|
|
object.current_functions = None
|
|
if data.get('biography'):
|
|
object.biography = RichTextValue(
|
|
raw=data.get('biography'),
|
|
mimeType='text/html',
|
|
outputMimeType='text/x-html-safe')
|
|
else:
|
|
object.biography = None
|
|
for address in ('private_address', 'work_address', 'work_address_2'):
|
|
if not data.get(address):
|
|
setattr(object, address, None)
|
|
continue
|
|
setattr(object, address, Address.from_dict(data.get(address)))
|
|
notify(ObjectModifiedEvent(object))
|
|
self.publish(object)
|
|
|
|
|
|
def sync_ministries(self, timestamp=None):
|
|
pass
|
|
|
|
def sync_commissions(self, timestamp=None):
|
|
commissions = json.load(self.urlopen('%s/@@listCommissions' % self.src_url)) # XXX: add timestamp
|
|
|
|
for commission_url in commissions:
|
|
data = json.load(self.urlopen('%s/json' % commission_url))
|
|
|
|
new_id = data.get('id')
|
|
if not hasattr(self.commissions_folder, new_id):
|
|
self.commissions_folder.invokeFactory('themis.datatypes.commission', new_id,
|
|
title=data.get('title'))
|
|
object = getattr(self.commissions_folder, new_id)
|
|
object.active = data.get('active')
|
|
object.president = self.create_relation_value_if_needed(object.president,
|
|
self.get_author_intid('deputy:%s' % data.get('president')))
|
|
object.vicepresidents = self.create_relation_value_list_if_needed(
|
|
object.vicepresidents,
|
|
[self.get_author_intid('deputy:%s' % x) for x in data.get('vicepresidents', [])])
|
|
object.members = self.create_relation_value_list_if_needed(
|
|
object.members,
|
|
[self.get_author_intid('deputy:%s' % x) for x in data.get('members', [])])
|
|
object.substitutes = self.create_relation_value_list_if_needed(
|
|
object.substitutes,
|
|
[self.get_author_intid('deputy:%s' % x) for x in data.get('substitutes', [])])
|
|
if data.get('competences'):
|
|
object.competences = data.get('competences')
|
|
notify(ObjectModifiedEvent(object))
|
|
self.publish(object)
|
|
|
|
def get_author_intid(self, author):
|
|
try:
|
|
obj = getattr(self.deputies_folder, author.split(':', 1)[1])
|
|
except AttributeError:
|
|
log.warn('failed to get author %r' % author)
|
|
return None
|
|
obj_intid = self.intids.getId(obj)
|
|
return obj_intid
|
|
|
|
def sync_documents(self, timestamp=None):
|
|
qs = ''
|
|
if timestamp:
|
|
qs = '?timestamp=%s' % timestamp
|
|
log.info('loading documents list from %s' % self.src_url)
|
|
documents = json.load(self.urlopen('%s/@@listDocuments%s' % (self.src_url, qs)))
|
|
log.info('loading documents list from %s -> DONE' % self.src_url)
|
|
|
|
for count, document_url in enumerate(documents):
|
|
log.info('loading %s' % document_url)
|
|
try:
|
|
data = json.load(self.urlopen('%s/json' % document_url))
|
|
except urllib2.HTTPError as e:
|
|
logging.info('failed to load (%r)' % e)
|
|
continue
|
|
|
|
new_id = data.get('id')
|
|
is_question = False
|
|
if data.get('portal_type') in ('Question_ecriteD',
|
|
'questionactualiteD', 'QuestionoraleD',
|
|
'interpellationD'):
|
|
if not hasattr(self.questions_folder, new_id):
|
|
self.questions_folder.invokeFactory('tabellio.documents.question', new_id,
|
|
title = data.get('title'))
|
|
is_question = True
|
|
object = getattr(self.questions_folder, new_id)
|
|
elif data.get('portal_type') in ('convocation_commissionD',
|
|
'convocation_seanceD'):
|
|
if not hasattr(self.convocations_folder, new_id):
|
|
self.convocations_folder.invokeFactory('tabellio.documents.document', new_id,
|
|
title = data.get('title'))
|
|
object = getattr(self.convocations_folder, new_id)
|
|
else:
|
|
if not hasattr(self.documents_folder, new_id):
|
|
self.documents_folder.invokeFactory('tabellio.documents.document', new_id,
|
|
title = data.get('title'))
|
|
object = getattr(self.documents_folder, new_id)
|
|
if data.get('title'):
|
|
object.title = data.get('title')
|
|
if data.get('session'):
|
|
object.session = data.get('session')
|
|
if data.get('fichier'):
|
|
content = self.urlopen(data.get('fichier')).read()
|
|
if not hasattr(object, 'file'):
|
|
object.file = None
|
|
if object.file is None or object.file.data != content:
|
|
object.file = NamedBlobFile(content, filename=data.get('id')+'.pdf')
|
|
if data.get('document_imprime'):
|
|
content = self.urlopen(data.get('document_imprime')).read()
|
|
if not hasattr(object, 'file'):
|
|
object.file = None
|
|
if object.file is None or object.file.data != content:
|
|
object.file = NamedBlobFile(content, filename=data.get('id')+'.pdf')
|
|
if data.get('object_type'):
|
|
if is_question:
|
|
object.questype = data.get('object_type').replace(' (D)', '')
|
|
else:
|
|
object.doctype = data.get('object_type').replace(' (D)', '')
|
|
if data.get('matieres'):
|
|
object.topics = data.get('matieres')
|
|
|
|
for attr in ('date_seance', 'date_seance_pleniere',
|
|
'date_de_la_commission', 'date_document_imprime',
|
|
'date_de_reception', 'date_reception',
|
|
'date_du_document', 'date_de_publication',
|
|
'date_du_rapport'):
|
|
if data.get(attr):
|
|
object.date = datetime.date.fromordinal(
|
|
datetime.datetime.strptime(
|
|
data.get(attr), '%Y-%m-%d').toordinal())
|
|
|
|
# initializing speakers list
|
|
speakers = []
|
|
|
|
for attr in ('orateurs_seance', 'orateurs', 'orateurs_en_commission',
|
|
'orateurs_en_seanceprop', 'orateurs_rapportcom',
|
|
'orateurs_seance_reponse_orale'):
|
|
speakers = list(set(speakers + (data.get(attr) or [])))
|
|
|
|
if not hasattr(object, 'speakers'):
|
|
object.speakers = []
|
|
|
|
speakers_intids = [self.get_author_intid(x) for x in speakers]
|
|
object.speakers = self.create_relation_value_list_if_needed(
|
|
object.speakers, speakers_intids)
|
|
|
|
if not hasattr(object, 'reporters'):
|
|
object.reporters = []
|
|
reporters_intids = [self.get_author_intid(x) for x in data.get('rapporteurs') or []]
|
|
object.reporters = self.create_relation_value_list_if_needed(
|
|
object.reporters, reporters_intids)
|
|
|
|
if data.get('date_sanction_promulgation'):
|
|
object.sanction_date = datetime.date.fromordinal(
|
|
datetime.datetime.strptime(
|
|
data.get('date_sanction_promulgation'), '%Y-%m-%d').toordinal())
|
|
else:
|
|
object.sanction_date = None
|
|
|
|
if data.get('reponse_a_question_ecrite'):
|
|
object.written_answer = data.get('reponse_a_question_ecrite')[0]
|
|
else:
|
|
object.written_answer = None
|
|
|
|
if data.get('date_seance_ou_commission'):
|
|
object.seance_com_date = datetime.date.fromordinal(
|
|
datetime.datetime.strptime(
|
|
data.get('date_seance_ou_commission'),
|
|
'%Y-%m-%d').toordinal())
|
|
else:
|
|
object.seance_com_date = None
|
|
|
|
for attr in ('numero_biq', 'numero_document', 'numero_bqr'):
|
|
if data.get(attr):
|
|
object.no = data.get(attr)
|
|
|
|
if data.get('numero_suite'):
|
|
object.nodoc = data.get('numero_suite')
|
|
|
|
related_doc_ids = []
|
|
for attr in ( 'autres_documents_lies_a_proposition',
|
|
'autres_documents_lies_au_projet',
|
|
'autres_documents_ou_dossiers_en_relation',
|
|
'bulletin_questions_ecrites',
|
|
'courriers_lies',
|
|
'cr_debats_declaration_politique_generale',
|
|
'cr_debats_declaration_programme',
|
|
'cr_reponse_orale',
|
|
'cr_seance',
|
|
'cr_seance_ou_biq',
|
|
'cr_seance_pleniere',
|
|
'cr_seance_prise_en_consideration',
|
|
'cr_seance_vote_motion',
|
|
'cr_vote_declaration_programme',
|
|
'docs_pointing',
|
|
'documents_contenus_dans_biq',
|
|
'documents_contenus_dans_bqr',
|
|
'documents_lies',
|
|
'documents_lies_budgetpfb',
|
|
'documents_lies_proposition_bt_rapporcom',
|
|
'document_s__li__s____ce_projet__bulletin_des_travaux__rapport_de_commission_',
|
|
'documents_r_f_ren_ant_ce_compte_rendu',
|
|
'documents_r_f_ren_ants',
|
|
'documents_r_f_renc_s',
|
|
'lien_qo_ou_qe',
|
|
'lien_vers_courrier_reponse',
|
|
'lien_vers_interpellation_ou_qe',
|
|
'question__ecrite_lien',
|
|
'reponses_ministre_lien'):
|
|
if not data.get(attr):
|
|
continue
|
|
related_doc_ids.extend(data.get(attr))
|
|
if related_doc_ids:
|
|
related_docs = []
|
|
for related_doc_id in related_doc_ids:
|
|
if related_doc_id is None:
|
|
continue
|
|
if hasattr(self.questions_folder, related_doc_id):
|
|
related_doc = getattr(self.questions_folder, related_doc_id)
|
|
elif hasattr(self.documents_folder, related_doc_id):
|
|
related_doc = getattr(self.documents_folder, related_doc_id)
|
|
elif hasattr(self.convocations_folder, related_doc_id):
|
|
related_doc = getattr(self.convocations_folder, related_doc_id)
|
|
else:
|
|
continue
|
|
related_docs.append(self.intids.getId(related_doc))
|
|
object.related_docs = self.create_relation_value_list_if_needed(
|
|
object.related_docs, related_docs)
|
|
else:
|
|
object.related_docs = []
|
|
|
|
authors_v = None
|
|
if data.get('auteur'):
|
|
authors_v = data.get('auteur')
|
|
if type(authors_v) is not list:
|
|
authors_v = [authors_v]
|
|
elif data.get('auteurs'):
|
|
authors_v = data.get('auteurs')
|
|
if authors_v == ['ministry:college']:
|
|
object.author_is_government = True
|
|
elif authors_v:
|
|
authors_intids = [self.get_author_intid(x) for x in authors_v]
|
|
if None in authors_intids:
|
|
log.warn('unknown error in doc %s' % object.id)
|
|
object.authors = self.create_relation_value_list_if_needed(
|
|
object.authors, authors_intids)
|
|
object.author_is_government = False
|
|
else:
|
|
object.authors = None
|
|
object.author_is_government = False
|
|
|
|
if data.get('etat_en_commission') and data.get('etat_en_commission') != u'Indéterminé':
|
|
object.commission_status = data.get('etat_en_commission')
|
|
else:
|
|
object.commission_status = None
|
|
|
|
if data.get('vote_seance') and data.get('vote_seance') != u'Indéterminé':
|
|
object.seance_vote = data.get('vote_seance')
|
|
else:
|
|
object.seance_vote = None
|
|
|
|
if data.get('portal_type') in ('PropositionD', 'ProjetD'):
|
|
if data.get('date_seance'):
|
|
object.seance_vote_date = datetime.date.fromordinal(
|
|
datetime.datetime.strptime(
|
|
data.get('date_seance'),
|
|
'%Y-%m-%d').toordinal())
|
|
else:
|
|
object.seance_vote_date = None
|
|
if data.get('vote_seance') == u'Adoption':
|
|
object.adopted = True
|
|
if data.get('date_approbation_tutelle'):
|
|
object.adoption_date = DateTime(data.get('date_approbation_tutelle'))
|
|
else:
|
|
object.adoption_date = None
|
|
if data.get('date_publication_moniteur_belge'):
|
|
object.moniteur_date = DateTime(data.get('date_publication_moniteur_belge'))
|
|
else:
|
|
object.moniteur_date = None
|
|
else:
|
|
object.adopted = False
|
|
object.adoption_date = None
|
|
object.moniteur_date = None
|
|
|
|
if is_question:
|
|
pass # XXX: import link to CRI
|
|
|
|
try:
|
|
notify(ObjectModifiedEvent(object))
|
|
except KeyError:
|
|
# ignore zc.relation errors that could happen in there.
|
|
pass
|
|
|
|
self.publish(object)
|
|
|
|
if count % 100 == 0:
|
|
transaction.commit()
|
|
|
|
log.info('documents done')
|
|
return count
|
|
|
|
def sync_blocked_documents(self, timestamp=None):
|
|
qs = ''
|
|
if timestamp:
|
|
qs = '?timestamp=%s' % timestamp
|
|
log.info('loading blocked documents list from %s' % self.src_url)
|
|
documents = json.load(self.urlopen('%s/@@listBlockedDocuments%s' % (self.src_url, qs)))
|
|
log.info('loading blocked documents list from %s -> DONE' % self.src_url)
|
|
|
|
for count, document_url in enumerate(documents):
|
|
log.info('loading %s' % document_url)
|
|
data = json.load(self.urlopen('%s/json' % document_url))
|
|
|
|
new_id = data.get('id')
|
|
is_question = False
|
|
if data.get('portal_type') in ('Question_ecriteD',
|
|
'questionactualiteD', 'QuestionoraleD',
|
|
'interpellationD'):
|
|
if hasattr(self.questions_folder, new_id):
|
|
self.questions_folder.manage_delObjects([new_id])
|
|
elif data.get('portal_type') in ('convocation_commissionD',
|
|
'convocation_seanceD'):
|
|
if not hasattr(self.convocations_folder, new_id):
|
|
self.convocations_folder.manage_delObjects([new_id])
|
|
else:
|
|
if hasattr(self.documents_folder, new_id):
|
|
self.documents_folder.manage_delObjects([new_id])
|
|
|
|
if count % 100 == 0:
|
|
transaction.commit()
|
|
|
|
log.info('blocked documents done')
|
|
|
|
def sync_deleted_documents(self, *args):
|
|
remote_documents = json.load(self.urlopen('%s/@@listDocuments' % self.src_url))
|
|
|
|
remote_document_ids = []
|
|
for remote_document_url in remote_documents:
|
|
remote_document_ids.append(remote_document_url.split('/')[-1])
|
|
|
|
local_document_ids = [doc_id for doc_id in self.documents_folder]
|
|
documents_to_purge = set(local_document_ids) - set(remote_document_ids)
|
|
|
|
if self.request.form.get('delete') == 'yes':
|
|
log.info('deleted documents sync')
|
|
for doc_id in documents_to_purge:
|
|
self.documents_folder.manage_delObjects([doc_id])
|
|
|
|
log.info('deleted documents sync done')
|
|
return 'OK'
|
|
|
|
return json.dumps(list(documents_to_purge))
|