create incoming mails from ocr

This commit is contained in:
Frédéric Péters 2011-10-30 17:19:02 +01:00
parent 3c7e28b883
commit 7962243e43
2 changed files with 95 additions and 2 deletions

View File

@ -2,10 +2,16 @@
xmlns="http://namespaces.zope.org/zope"
xmlns:five="http://namespaces.zope.org/five"
xmlns:i18n="http://namespaces.zope.org/i18n"
xmlns:browser="http://namespaces.zope.org/browser"
i18n_domain="themis.ocrloader">
<five:registerPackage package="." initialize=".initialize" />
<!-- -*- extra stuff goes here -*- -->
<browser:view
name="ocrLoader"
for="Products.CMFPlone.interfaces.IPloneSiteRoot"
class=".ocrloader.LoadFromOcr"
permission="cmf.ManagePortal"
/>
</configure>

View File

@ -0,0 +1,87 @@
# -*- coding: utf-8 -*-
import datetime
import os
from Products.CMFCore.utils import getToolByName
from Products.Five.browser import BrowserView
from plone.namedfile.file import NamedBlobFile
import transaction
import themis.config.utils
class LoadFromOcr(BrowserView):
output_path = '/mnt/gedimport'
def __call__(self):
# output path is the directory where the OCR system uploads the files
self.output_path = self.request.form.get('outputPath', self.output_path)
portal = getToolByName(self.context, 'portal_url').getPortalObject()
plone_utils = getToolByName(self.context, 'plone_utils')
mail_folder = portal
for part in themis.config.utils.get_incoming_mails_location().split('/'):
if not part:
continue
mail_folder = getattr(mail_folder, part)
for base, dirnames, filenames in os.walk(self.output_path):
for filename in filenames:
if not filename.lower().endswith('.pdf'):
continue
print 'processing', filename
code_cat, number, date, time = filename.split('_')
try:
categorie_de_courrier, sous_categorie_de_courrier = \
themis.config.utils.get_mail_categories_from_ocr_code(code_cat)
except TypeError:
# XXX: log
continue
date_reception = datetime.datetime(
int(date[0:4]), int(date[4:6]), int(date[6:]),
int(time[:2]),int(time[2:4]),int(time[4:6]))
mail_title = u'%s %s du %s à %s' % (
(categorie_de_courrier or sous_categorie_de_courrier),
number,
date_reception.strftime('%d/%m/%Y'),
date_reception.strftime('%H:%M:%S'))
mail_id = plone_utils.normalizeString(
u'%s %s du %s à %s' % (
(categorie_de_courrier or sous_categorie_de_courrier),
number,
date_reception.strftime('%Y-%m-%d'),
date_reception.strftime('%H-%M-%S')))
if hasattr(mail_folder, mail_id):
# already imported (log?)
continue
if categorie_de_courrier:
categorie_de_courrier = [categorie_de_courrier]
else:
categorie_de_courrier = None
if sous_categorie_de_courrier:
sous_categorie_de_courrier = [sous_categorie_de_courrier]
else:
sous_categorie_de_courrier = None
mail_file = NamedBlobFile(file(os.path.join(base, filename)).read(),
filename=unicode(filename))
mail_folder.invokeFactory('courrier_entrant', id=mail_id, title=mail_title,
numero_courrier=number,
date_reception=date_reception,
categorie_de_courrier=categorie_de_courrier,
sous_categorie_de_courrier=sous_categorie_de_courrier,
fichier=mail_file)
os.rename(os.path.join(base, filename),
os.path.join(base, filename + '.processed'))
transaction.commit()
return 'OK'