summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrédéric Péters <fpeters@entrouvert.com>2014-03-27 10:57:55 (GMT)
committerFrédéric Péters <fpeters@entrouvert.com>2014-03-27 10:57:55 (GMT)
commite6e14eb7baccad1ea2d506539c45d1baad884708 (patch)
tree0d1885d117719ecc6f91094be292e80bd153ae52
parent82bbe28e8b169f1fdf4485bd7110249d86f87e6a (diff)
downloadpfwbged.ocrloader-e6e14eb7baccad1ea2d506539c45d1baad884708.zip
pfwbged.ocrloader-e6e14eb7baccad1ea2d506539c45d1baad884708.tar.gz
pfwbged.ocrloader-e6e14eb7baccad1ea2d506539c45d1baad884708.tar.bz2
use logging module to log
-rwxr-xr-x[-rw-r--r--]ocrloader.py25
1 files changed, 14 insertions, 11 deletions
diff --git a/ocrloader.py b/ocrloader.py
index 81c449c..34a75fd 100644..100755
--- a/ocrloader.py
+++ b/ocrloader.py
@@ -26,12 +26,15 @@ config_filepath = options.config
import logging
-logging.basicConfig(level=logging.DEBUG)
+logging.basicConfig(level=logging.DEBUG,
+ filename='/var/log/pfwbged.ocrloader.log',
+ format='%(asctime)s %(message)s')
+
def process(cfg, filename, payload, enable_ocr=True):
ocr_filename = os.path.join(cfg.get('ocrized_directory'), filename)
if not enable_ocr:
if not os.path.exists(ocr_filename):
- print >> sys.stderr, ' skipping OCR phase'
+ logging.debug(' skipping OCR phase')
fd = file(ocr_filename, 'w')
fd.write(payload)
fd.close()
@@ -40,22 +43,22 @@ def process(cfg, filename, payload, enable_ocr=True):
fd, tmpfilename = tempfile.mkstemp(suffix='.pdf', prefix='ocrloader-')
os.write(fd, payload)
os.close(fd)
- print >> sys.stderr, ' running OCR on file'
+ logging.debug(' running OCR on file')
subprocess.call(['/opt/ABBYYOCR9/abbyyocr9',
'-rl', 'French', '-if', tmpfilename,
'-f', 'PDF', '-pem', 'ImageOnText', '-pfpr', '150',
'-pfq', '100', '-of', ocr_filename])
if not os.path.exists(ocr_filename):
- print >> sys.stderr, 'failed to OCR %s' % filename
+ logging.error('failed to OCR %s', filename)
file('/tmp/' + filename, 'w').write(payload) # keep it for inspection
return False
if cfg.get('store_path'):
- print ' storing file locally'
+ logging.debug(' storing file locally')
shutil.copy(ocr_filename, os.path.join(cfg.get('store_path'), filename))
return True
else:
- print ' uploading file'
+ logging.debug(' uploading file')
now = datetime.datetime.now()
title = string.Template(cfg.get('title')).substitute(
{'date': now.strftime('%d/%m/%Y'),
@@ -97,7 +100,7 @@ while True:
cfg = ConfigParser.ConfigParser()
cfg.read(config_filepath)
for section in cfg.sections():
- print 'processing', section
+ logging.debug('processing %s', section)
imap_server = cfg.get(section, 'imap_server')
ssl = cfg.getboolean(section, 'ssl')
ged_base_url = cfg.get(section, 'ged_base_url')
@@ -108,7 +111,7 @@ while True:
else:
M = imaplib.IMAP4(host_imap_server)
except:
- print 'failed to connect to imap server'
+ logging.error('failed to connect to imap server')
time.sleep(30)
continue
try:
@@ -126,14 +129,14 @@ while True:
for part in msg.walk():
if part.get_content_type() == 'application/pdf':
filename = part.get_filename()
- print ' handling', filename
+ logging.info(' handling %s', filename)
payload = part.get_payload(decode=True)
if not process(dict(cfg.items(section)), filename, payload, enable_ocr):
- print ' error -> marking as unseen'
+ logging.error(' error -> marking as unseen')
M.store(num, '-FLAGS', r'\Seen')
break
M.close()
M.logout()
- print 'waiting a bit', time.strftime('%Y-%m-%d %H:%M:%S')
+ logging.debug('waiting a bit %s', time.strftime('%Y-%m-%d %H:%M:%S'))
time.sleep(30)