diff --git a/themis/ocrloader/configure.zcml b/themis/ocrloader/configure.zcml index eecd883..4119071 100644 --- a/themis/ocrloader/configure.zcml +++ b/themis/ocrloader/configure.zcml @@ -2,10 +2,16 @@ xmlns="http://namespaces.zope.org/zope" xmlns:five="http://namespaces.zope.org/five" xmlns:i18n="http://namespaces.zope.org/i18n" + xmlns:browser="http://namespaces.zope.org/browser" i18n_domain="themis.ocrloader"> - - + + diff --git a/themis/ocrloader/ocrloader.py b/themis/ocrloader/ocrloader.py new file mode 100644 index 0000000..d2c3ab2 --- /dev/null +++ b/themis/ocrloader/ocrloader.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- + +import datetime +import os + +from Products.CMFCore.utils import getToolByName +from Products.Five.browser import BrowserView +from plone.namedfile.file import NamedBlobFile + +import transaction + +import themis.config.utils + +class LoadFromOcr(BrowserView): + output_path = '/mnt/gedimport' + + def __call__(self): + # output path is the directory where the OCR system uploads the files + self.output_path = self.request.form.get('outputPath', self.output_path) + portal = getToolByName(self.context, 'portal_url').getPortalObject() + plone_utils = getToolByName(self.context, 'plone_utils') + + mail_folder = portal + for part in themis.config.utils.get_incoming_mails_location().split('/'): + if not part: + continue + mail_folder = getattr(mail_folder, part) + + for base, dirnames, filenames in os.walk(self.output_path): + for filename in filenames: + if not filename.lower().endswith('.pdf'): + continue + print 'processing', filename + code_cat, number, date, time = filename.split('_') + + try: + categorie_de_courrier, sous_categorie_de_courrier = \ + themis.config.utils.get_mail_categories_from_ocr_code(code_cat) + except TypeError: + # XXX: log + continue + + date_reception = datetime.datetime( + int(date[0:4]), int(date[4:6]), int(date[6:]), + int(time[:2]),int(time[2:4]),int(time[4:6])) + + mail_title = u'%s %s du %s à %s' % ( + (categorie_de_courrier or sous_categorie_de_courrier), + number, + date_reception.strftime('%d/%m/%Y'), + date_reception.strftime('%H:%M:%S')) + mail_id = plone_utils.normalizeString( + u'%s %s du %s à %s' % ( + (categorie_de_courrier or sous_categorie_de_courrier), + number, + date_reception.strftime('%Y-%m-%d'), + date_reception.strftime('%H-%M-%S'))) + + if hasattr(mail_folder, mail_id): + # already imported (log?) + continue + + if categorie_de_courrier: + categorie_de_courrier = [categorie_de_courrier] + else: + categorie_de_courrier = None + + if sous_categorie_de_courrier: + sous_categorie_de_courrier = [sous_categorie_de_courrier] + else: + sous_categorie_de_courrier = None + + mail_file = NamedBlobFile(file(os.path.join(base, filename)).read(), + filename=unicode(filename)) + mail_folder.invokeFactory('courrier_entrant', id=mail_id, title=mail_title, + numero_courrier=number, + date_reception=date_reception, + categorie_de_courrier=categorie_de_courrier, + sous_categorie_de_courrier=sous_categorie_de_courrier, + fichier=mail_file) + + os.rename(os.path.join(base, filename), + os.path.join(base, filename + '.processed')) + transaction.commit() + + return 'OK' +