use logging module to log
This commit is contained in:
parent
82bbe28e8b
commit
e6e14eb7ba
|
@ -26,12 +26,15 @@ config_filepath = options.config
|
|||
|
||||
|
||||
import logging
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.basicConfig(level=logging.DEBUG,
|
||||
filename='/var/log/pfwbged.ocrloader.log',
|
||||
format='%(asctime)s %(message)s')
|
||||
|
||||
def process(cfg, filename, payload, enable_ocr=True):
|
||||
ocr_filename = os.path.join(cfg.get('ocrized_directory'), filename)
|
||||
if not enable_ocr:
|
||||
if not os.path.exists(ocr_filename):
|
||||
print >> sys.stderr, ' skipping OCR phase'
|
||||
logging.debug(' skipping OCR phase')
|
||||
fd = file(ocr_filename, 'w')
|
||||
fd.write(payload)
|
||||
fd.close()
|
||||
|
@ -40,22 +43,22 @@ def process(cfg, filename, payload, enable_ocr=True):
|
|||
fd, tmpfilename = tempfile.mkstemp(suffix='.pdf', prefix='ocrloader-')
|
||||
os.write(fd, payload)
|
||||
os.close(fd)
|
||||
print >> sys.stderr, ' running OCR on file'
|
||||
logging.debug(' running OCR on file')
|
||||
subprocess.call(['/opt/ABBYYOCR9/abbyyocr9',
|
||||
'-rl', 'French', '-if', tmpfilename,
|
||||
'-f', 'PDF', '-pem', 'ImageOnText', '-pfpr', '150',
|
||||
'-pfq', '100', '-of', ocr_filename])
|
||||
if not os.path.exists(ocr_filename):
|
||||
print >> sys.stderr, 'failed to OCR %s' % filename
|
||||
logging.error('failed to OCR %s', filename)
|
||||
file('/tmp/' + filename, 'w').write(payload) # keep it for inspection
|
||||
return False
|
||||
|
||||
if cfg.get('store_path'):
|
||||
print ' storing file locally'
|
||||
logging.debug(' storing file locally')
|
||||
shutil.copy(ocr_filename, os.path.join(cfg.get('store_path'), filename))
|
||||
return True
|
||||
else:
|
||||
print ' uploading file'
|
||||
logging.debug(' uploading file')
|
||||
now = datetime.datetime.now()
|
||||
title = string.Template(cfg.get('title')).substitute(
|
||||
{'date': now.strftime('%d/%m/%Y'),
|
||||
|
@ -97,7 +100,7 @@ while True:
|
|||
cfg = ConfigParser.ConfigParser()
|
||||
cfg.read(config_filepath)
|
||||
for section in cfg.sections():
|
||||
print 'processing', section
|
||||
logging.debug('processing %s', section)
|
||||
imap_server = cfg.get(section, 'imap_server')
|
||||
ssl = cfg.getboolean(section, 'ssl')
|
||||
ged_base_url = cfg.get(section, 'ged_base_url')
|
||||
|
@ -108,7 +111,7 @@ while True:
|
|||
else:
|
||||
M = imaplib.IMAP4(host_imap_server)
|
||||
except:
|
||||
print 'failed to connect to imap server'
|
||||
logging.error('failed to connect to imap server')
|
||||
time.sleep(30)
|
||||
continue
|
||||
try:
|
||||
|
@ -126,14 +129,14 @@ while True:
|
|||
for part in msg.walk():
|
||||
if part.get_content_type() == 'application/pdf':
|
||||
filename = part.get_filename()
|
||||
print ' handling', filename
|
||||
logging.info(' handling %s', filename)
|
||||
payload = part.get_payload(decode=True)
|
||||
if not process(dict(cfg.items(section)), filename, payload, enable_ocr):
|
||||
print ' error -> marking as unseen'
|
||||
logging.error(' error -> marking as unseen')
|
||||
M.store(num, '-FLAGS', r'\Seen')
|
||||
break
|
||||
M.close()
|
||||
M.logout()
|
||||
|
||||
print 'waiting a bit', time.strftime('%Y-%m-%d %H:%M:%S')
|
||||
logging.debug('waiting a bit %s', time.strftime('%Y-%m-%d %H:%M:%S'))
|
||||
time.sleep(30)
|
||||
|
|
Reference in New Issue