check ocr doesn't produce empty files

This commit is contained in:
Frédéric Péters 2014-09-23 14:25:40 +02:00
parent b559f97722
commit 410ea2902b
1 changed files with 10 additions and 3 deletions

View File

@ -44,16 +44,23 @@ def process(cfg, filename, payload, enable_ocr=True):
fd, tmpfilename = tempfile.mkstemp(suffix='.pdf', prefix='ocrloader-')
os.write(fd, payload)
os.close(fd)
logging.debug(' running OCR on file')
subprocess.call(['/opt/ABBYYOCR9/abbyyocr9',
logging.debug(' running OCR on file (%s / %s)' % (tmpfilename, ocr_filename))
cmd = ['/opt/ABBYYOCR9/abbyyocr9', '-pi',
'-rl', 'French', '-if', tmpfilename,
'-f', 'PDF', '-pem', 'ImageOnText', '-pfpr', '150',
'-pfq', '100', '-of', ocr_filename])
'-pfq', '100', '-of', ocr_filename]
logging.debug(' %s' % ' '.join(cmd))
subprocess.call(cmd)
if os.stat(ocr_filename)[6] == 0:
os.unlink(ocr_filename)
if not os.path.exists(ocr_filename):
logging.error('failed to OCR %s', filename)
file('/tmp/' + filename, 'w').write(payload) # keep it for inspection
return False
if os.stat(ocr_filename)[6] == 0:
os.unlink(ocr_filename)
if cfg.get('store_path'):
logging.debug(' storing file locally')
shutil.copy(ocr_filename, os.path.join(cfg.get('store_path'), filename))