summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrédéric Péters <fpeters@entrouvert.com>2014-10-07 08:26:42 (GMT)
committerFrédéric Péters <fpeters@entrouvert.com>2014-10-07 08:47:41 (GMT)
commite64205c92f0b5765bd400d0b9ff0b09d2962fc1b (patch)
tree7633b6aee1dc23629178fef96a9e0b2fd01d5cd3
parentddbcd61aa4f6863ae14198593f34e13b6b6ea14f (diff)
downloadpfwbged.ocrloader-e64205c92f0b5765bd400d0b9ff0b09d2962fc1b.zip
pfwbged.ocrloader-e64205c92f0b5765bd400d0b9ff0b09d2962fc1b.tar.gz
pfwbged.ocrloader-e64205c92f0b5765bd400d0b9ff0b09d2962fc1b.tar.bz2
allow uploading files from a directory (#4447)
-rw-r--r--ocrloader.ini7
-rwxr-xr-xocrloader.py30
2 files changed, 36 insertions, 1 deletions
diff --git a/ocrloader.ini b/ocrloader.ini
index 669360c..75e0a98 100644
--- a/ocrloader.ini
+++ b/ocrloader.ini
@@ -6,6 +6,7 @@ ged_username = admin
ged_password = xxxxx
ocrized_directory = /root/tests
title = Document scanned on %(date)s at %(time)s
+file_success_action = move
[xxxx-test-greffe@pfwb.be]
password = xxxxxx
@@ -23,3 +24,9 @@ user = gaetan
[xxxx-test2-gaetandeberdt@pfwb.be]
password = xxxxxx
store_path = /srv/ocr/DeberdtG
+
+[/srv/ocr/upload/DeberdtG]
+default_type = dmsdocument
+default_directory = Members/gaetan
+user = gaetan
+success_action = delete
diff --git a/ocrloader.py b/ocrloader.py
index a777f5c..551fcd7 100755
--- a/ocrloader.py
+++ b/ocrloader.py
@@ -109,9 +109,37 @@ while True:
cfg.read(config_filepath)
for section in cfg.sections():
logging.debug('processing %s', section)
+ ged_base_url = cfg.get(section, 'ged_base_url')
+
+ if section.startswith('/'):
+ # handle dropped files
+ for basedir, dirnames, filenames in os.walk(section):
+ for filename in filenames:
+ if filename.endswith('.uploaded'):
+ continue
+ filepath = os.path.realpath(os.path.join(basedir, filename))
+ if not filepath.startswith(basedir):
+ # check the real path as an attacker could create a
+ # symlink to whatever directory and cause total
+ # destruction of it. (as well as the upload of its
+ # contents to the GED...).
+ logging.warn('wrong base dir for %s', filepath)
+ continue
+ payload = file(filepath).read()
+ logging.debug(' uploading file %s', filepath)
+ enable_ocr = (filename.startswith('ocr-') and filename.endswith('.pdf'))
+ if not process(dict(cfg.items(section)), filename, payload, enable_ocr):
+ logging.error(' error processing %s', filepath)
+ else:
+ if cfg.get(section, 'file_success_action') == 'delete':
+ os.unlink(filepath)
+ else:
+ os.rename(filepath, filepath + '.uploaded')
+ continue
+
+ # handle imap mailboxes
imap_server = cfg.get(section, 'imap_server')
ssl = cfg.getboolean(section, 'ssl')
- ged_base_url = cfg.get(section, 'ged_base_url')
try:
if ssl: