diff --git a/themis/ocrloader/ocrloader.py b/themis/ocrloader/ocrloader.py index f548ce9..62378b3 100644 --- a/themis/ocrloader/ocrloader.py +++ b/themis/ocrloader/ocrloader.py @@ -47,8 +47,9 @@ class LoadFromOcr(BrowserView): try: code_cat, number, date, time = filename.split('_') except ValueError: - # XXX: log log.warning('unknown file name format (%s)' % filename) + os.rename(os.path.join(base, filename), + os.path.join(base, filename + '.invalid')) continue for doctype in ('incoming_mails', 'outgoing_mails', @@ -61,6 +62,8 @@ class LoadFromOcr(BrowserView): break else: log.warning('no suitable document type found for %s' % filename) + os.rename(os.path.join(base, filename), + os.path.join(base, filename + '.invalid.nodoctype')) continue folder = self.get_folder(doctype, category_folder) @@ -94,6 +97,8 @@ class LoadFromOcr(BrowserView): if hasattr(folder, ocr_id): logging.warning('document id already exists (%s)' % filename) + os.rename(os.path.join(base, filename), + os.path.join(base, filename + '.invalid.already-exists')) continue if category: @@ -136,14 +141,30 @@ class LoadFromOcr(BrowserView): logging.warning('unknown document type (%s, %s)' % (doctype, filename)) continue - folder.invokeFactory(factory, id=ocr_id, title=ocr_title, - fichier=ocr_file, **kwargs) - os.rename(os.path.join(base, filename), - os.path.join(base, filename + '.processed')) - transaction.commit() + logging.info('invoking factory (%s) for %s (%r)' % (factory, ocr_id, ocr_title)) + oid = folder.invokeFactory(factory, id=ocr_id, title=ocr_title, + fichier=ocr_file, **kwargs) - object = getattr(folder, ocr_id) - notify(ObjectAddedEvent(object)) + logging.info('done, %s has been created' % ocr_id) + + transaction.savepoint() #optimistic=True) + logging.info('done, transaction.savepoint') + + try: + os.rename(os.path.join(base, filename), + os.path.join(base, filename + '.processed')) + except OSError as e: + logging.warning('failed to rename to processed (%s) (%r)' % (ocr_id, str(e))) + logging.info('rename done for %s' % ocr_id) + + try: + object = folder._getOb(oid) + except (AttributeError, KeyError): + logging.warning('failed to getattr object (%s)' % ocr_id) + else: + notify(ObjectAddedEvent(object)) + logging.info('notifying done for %s' % ocr_id) + return 'OK' return 'OK'