diff --git a/themis/ocrloader/ocrloader.py b/themis/ocrloader/ocrloader.py index 1003959..74bc0c4 100644 --- a/themis/ocrloader/ocrloader.py +++ b/themis/ocrloader/ocrloader.py @@ -2,6 +2,7 @@ import datetime import os +import logging from Products.CMFCore.utils import getToolByName from Products.Five.browser import BrowserView @@ -11,6 +12,8 @@ import transaction import themis.config.utils +log = logging.getLogger('Plone') + class LoadFromOcr(BrowserView): output_path = '/mnt/gedimport' @@ -33,8 +36,13 @@ class LoadFromOcr(BrowserView): for filename in filenames: if not filename.lower().endswith('.pdf'): continue - print 'processing', filename - code_cat, number, date, time = filename.split('_') + log.info('processing %s' % filename) + try: + code_cat, number, date, time = filename.split('_') + except ValueError: + # XXX: log + log.warning('unknown file name format (%s)' % filename) + continue for doctype in ('incoming_mails', 'outgoing_mails', 'internal_documents', 'confidential_documents'): @@ -45,7 +53,7 @@ class LoadFromOcr(BrowserView): continue break else: - # XXX: log? + log.warning('no suitable document type found for %s' % filename) continue folder = self.get_folder(doctype) @@ -78,7 +86,7 @@ class LoadFromOcr(BrowserView): ocr_date.strftime('%H-%M-%S'))) if hasattr(folder, ocr_id): - # already imported (log?) + logging.warning('document id already exists (%s)' % filename) continue if category: