diff --git a/ocrloader.ini b/ocrloader.ini index 669360c..75e0a98 100644 --- a/ocrloader.ini +++ b/ocrloader.ini @@ -6,6 +6,7 @@ ged_username = admin ged_password = xxxxx ocrized_directory = /root/tests title = Document scanned on %(date)s at %(time)s +file_success_action = move [xxxx-test-greffe@pfwb.be] password = xxxxxx @@ -23,3 +24,9 @@ user = gaetan [xxxx-test2-gaetandeberdt@pfwb.be] password = xxxxxx store_path = /srv/ocr/DeberdtG + +[/srv/ocr/upload/DeberdtG] +default_type = dmsdocument +default_directory = Members/gaetan +user = gaetan +success_action = delete diff --git a/ocrloader.py b/ocrloader.py index a777f5c..551fcd7 100755 --- a/ocrloader.py +++ b/ocrloader.py @@ -109,9 +109,37 @@ while True: cfg.read(config_filepath) for section in cfg.sections(): logging.debug('processing %s', section) + ged_base_url = cfg.get(section, 'ged_base_url') + + if section.startswith('/'): + # handle dropped files + for basedir, dirnames, filenames in os.walk(section): + for filename in filenames: + if filename.endswith('.uploaded'): + continue + filepath = os.path.realpath(os.path.join(basedir, filename)) + if not filepath.startswith(basedir): + # check the real path as an attacker could create a + # symlink to whatever directory and cause total + # destruction of it. (as well as the upload of its + # contents to the GED...). + logging.warn('wrong base dir for %s', filepath) + continue + payload = file(filepath).read() + logging.debug(' uploading file %s', filepath) + enable_ocr = (filename.startswith('ocr-') and filename.endswith('.pdf')) + if not process(dict(cfg.items(section)), filename, payload, enable_ocr): + logging.error(' error processing %s', filepath) + else: + if cfg.get(section, 'file_success_action') == 'delete': + os.unlink(filepath) + else: + os.rename(filepath, filepath + '.uploaded') + continue + + # handle imap mailboxes imap_server = cfg.get(section, 'imap_server') ssl = cfg.getboolean(section, 'ssl') - ged_base_url = cfg.get(section, 'ged_base_url') try: if ssl: