summaryrefslogtreecommitdiffstats
path: root/ocrloader.py
diff options
context:
space:
mode:
authorFrédéric Péters <fpeters@entrouvert.com>2014-10-07 08:26:42 (GMT)
committerFrédéric Péters <fpeters@entrouvert.com>2014-10-07 08:47:41 (GMT)
commite64205c92f0b5765bd400d0b9ff0b09d2962fc1b (patch)
tree7633b6aee1dc23629178fef96a9e0b2fd01d5cd3 /ocrloader.py
parentddbcd61aa4f6863ae14198593f34e13b6b6ea14f (diff)
downloadpfwbged.ocrloader-e64205c92f0b5765bd400d0b9ff0b09d2962fc1b.zip
pfwbged.ocrloader-e64205c92f0b5765bd400d0b9ff0b09d2962fc1b.tar.gz
pfwbged.ocrloader-e64205c92f0b5765bd400d0b9ff0b09d2962fc1b.tar.bz2
allow uploading files from a directory (#4447)
Diffstat (limited to 'ocrloader.py')
-rwxr-xr-xocrloader.py30
1 files changed, 29 insertions, 1 deletions
diff --git a/ocrloader.py b/ocrloader.py
index a777f5c..551fcd7 100755
--- a/ocrloader.py
+++ b/ocrloader.py
@@ -109,9 +109,37 @@ while True:
cfg.read(config_filepath)
for section in cfg.sections():
logging.debug('processing %s', section)
+ ged_base_url = cfg.get(section, 'ged_base_url')
+
+ if section.startswith('/'):
+ # handle dropped files
+ for basedir, dirnames, filenames in os.walk(section):
+ for filename in filenames:
+ if filename.endswith('.uploaded'):
+ continue
+ filepath = os.path.realpath(os.path.join(basedir, filename))
+ if not filepath.startswith(basedir):
+ # check the real path as an attacker could create a
+ # symlink to whatever directory and cause total
+ # destruction of it. (as well as the upload of its
+ # contents to the GED...).
+ logging.warn('wrong base dir for %s', filepath)
+ continue
+ payload = file(filepath).read()
+ logging.debug(' uploading file %s', filepath)
+ enable_ocr = (filename.startswith('ocr-') and filename.endswith('.pdf'))
+ if not process(dict(cfg.items(section)), filename, payload, enable_ocr):
+ logging.error(' error processing %s', filepath)
+ else:
+ if cfg.get(section, 'file_success_action') == 'delete':
+ os.unlink(filepath)
+ else:
+ os.rename(filepath, filepath + '.uploaded')
+ continue
+
+ # handle imap mailboxes
imap_server = cfg.get(section, 'imap_server')
ssl = cfg.getboolean(section, 'ssl')
- ged_base_url = cfg.get(section, 'ged_base_url')
try:
if ssl: