143 lines
5.7 KiB
Python
143 lines
5.7 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
import datetime
|
|
import os
|
|
import logging
|
|
|
|
from DateTime import DateTime
|
|
|
|
from Products.CMFCore.utils import getToolByName
|
|
from Products.Five.browser import BrowserView
|
|
from plone.namedfile.file import NamedBlobFile
|
|
|
|
import transaction
|
|
|
|
import themis.config.utils
|
|
|
|
log = logging.getLogger('Plone')
|
|
|
|
class LoadFromOcr(BrowserView):
|
|
output_path = '/mnt/gedimport'
|
|
|
|
def get_folder(self, doctype):
|
|
portal = getToolByName(self.context, 'portal_url').getPortalObject()
|
|
folder = portal
|
|
for part in themis.config.utils.get_ocr_location(doctype).split('/'):
|
|
if not part:
|
|
continue
|
|
folder = getattr(folder, part)
|
|
return folder
|
|
|
|
def __call__(self):
|
|
# output path is the directory where the OCR system uploads the files
|
|
self.output_path = self.request.form.get('outputPath', self.output_path)
|
|
portal = getToolByName(self.context, 'portal_url').getPortalObject()
|
|
plone_utils = getToolByName(self.context, 'plone_utils')
|
|
|
|
for base, dirnames, filenames in os.walk(self.output_path):
|
|
for filename in filenames:
|
|
if not filename.lower().endswith('.pdf'):
|
|
continue
|
|
log.info('processing %s' % filename)
|
|
try:
|
|
code_cat, number, date, time = filename.split('_')
|
|
except ValueError:
|
|
# XXX: log
|
|
log.warning('unknown file name format (%s)' % filename)
|
|
continue
|
|
|
|
for doctype in ('incoming_mails', 'outgoing_mails',
|
|
'internal_documents', 'confidential_documents'):
|
|
try:
|
|
category, subcategory = \
|
|
themis.config.utils.get_categories_from_ocr_code(code_cat, doctype)
|
|
except TypeError:
|
|
continue
|
|
break
|
|
else:
|
|
log.warning('no suitable document type found for %s' % filename)
|
|
continue
|
|
|
|
folder = self.get_folder(doctype)
|
|
|
|
ocr_date = datetime.datetime(
|
|
int(date[0:4]), int(date[4:6]), int(date[6:]),
|
|
int(time[:2]),int(time[2:4]),int(time[4:6]))
|
|
|
|
if doctype in ('incoming_mails', 'outgoing_mails'):
|
|
ocr_title = u'%s %s du %s à %s' % (
|
|
(category or subcategory),
|
|
number,
|
|
ocr_date.strftime('%d/%m/%Y'),
|
|
ocr_date.strftime('%H:%M:%S'))
|
|
ocr_id = plone_utils.normalizeString(
|
|
u'%s %s du %s à %s' % (
|
|
(category or subcategory),
|
|
number,
|
|
ocr_date.strftime('%Y-%m-%d'),
|
|
ocr_date.strftime('%H-%M-%S')))
|
|
else:
|
|
ocr_title = u'Document %s du %s à %s' % (
|
|
number,
|
|
ocr_date.strftime('%d/%m/%Y'),
|
|
ocr_date.strftime('%H:%M:%S'))
|
|
ocr_id = plone_utils.normalizeString(
|
|
u'Document %s du %s à %s' % (
|
|
number,
|
|
ocr_date.strftime('%Y-%m-%d'),
|
|
ocr_date.strftime('%H-%M-%S')))
|
|
|
|
if hasattr(folder, ocr_id):
|
|
logging.warning('document id already exists (%s)' % filename)
|
|
continue
|
|
|
|
if category:
|
|
category = [category]
|
|
else:
|
|
category = None
|
|
|
|
if subcategory:
|
|
subcategory = [subcategory]
|
|
else:
|
|
subcategory = None
|
|
|
|
ocr_file = NamedBlobFile(file(os.path.join(base, filename)).read(),
|
|
filename=unicode(filename))
|
|
|
|
if ocr_date:
|
|
ocr_date = datetime.date.fromordinal(ocr_date.toordinal())
|
|
|
|
if doctype == 'incoming_mails':
|
|
factory = 'courrier_entrant'
|
|
kwargs = {'numero_courrier': number,
|
|
'date_reception': ocr_date,
|
|
'categorie_de_courrier': category,
|
|
'sous_categorie_de_courrier': subcategory}
|
|
elif doctype == 'outgoing_mails':
|
|
factory = 'courrier_sortant'
|
|
kwargs = {'numero_courrier': number,
|
|
'date_envoi': ocr_date,
|
|
'categorie_de_courrier': category,
|
|
'sous_categorie_de_courrier': subcategory}
|
|
elif doctype == 'internal_documents':
|
|
factory = 'document_interne'
|
|
kwargs = {'no_du_dossier': number,
|
|
'categorie': category}
|
|
elif doctype == 'confidential_documents':
|
|
factory = 'document_confidentiel'
|
|
kwargs = {'no_du_dossier': number,
|
|
'categorie': category}
|
|
else:
|
|
logging.warning('unknown document type (%s, %s)' % (doctype, filename))
|
|
continue
|
|
|
|
folder.invokeFactory(factory, id=ocr_id, title=ocr_title,
|
|
fichier=ocr_file, **kwargs)
|
|
|
|
os.rename(os.path.join(base, filename),
|
|
os.path.join(base, filename + '.processed'))
|
|
transaction.commit()
|
|
|
|
return 'OK'
|
|
|