check ocr doesn't produce empty files
This commit is contained in:
parent
b559f97722
commit
410ea2902b
13
ocrloader.py
13
ocrloader.py
|
@ -44,16 +44,23 @@ def process(cfg, filename, payload, enable_ocr=True):
|
|||
fd, tmpfilename = tempfile.mkstemp(suffix='.pdf', prefix='ocrloader-')
|
||||
os.write(fd, payload)
|
||||
os.close(fd)
|
||||
logging.debug(' running OCR on file')
|
||||
subprocess.call(['/opt/ABBYYOCR9/abbyyocr9',
|
||||
logging.debug(' running OCR on file (%s / %s)' % (tmpfilename, ocr_filename))
|
||||
cmd = ['/opt/ABBYYOCR9/abbyyocr9', '-pi',
|
||||
'-rl', 'French', '-if', tmpfilename,
|
||||
'-f', 'PDF', '-pem', 'ImageOnText', '-pfpr', '150',
|
||||
'-pfq', '100', '-of', ocr_filename])
|
||||
'-pfq', '100', '-of', ocr_filename]
|
||||
logging.debug(' %s' % ' '.join(cmd))
|
||||
subprocess.call(cmd)
|
||||
if os.stat(ocr_filename)[6] == 0:
|
||||
os.unlink(ocr_filename)
|
||||
if not os.path.exists(ocr_filename):
|
||||
logging.error('failed to OCR %s', filename)
|
||||
file('/tmp/' + filename, 'w').write(payload) # keep it for inspection
|
||||
return False
|
||||
|
||||
if os.stat(ocr_filename)[6] == 0:
|
||||
os.unlink(ocr_filename)
|
||||
|
||||
if cfg.get('store_path'):
|
||||
logging.debug(' storing file locally')
|
||||
shutil.copy(ocr_filename, os.path.join(cfg.get('store_path'), filename))
|
||||
|
|
Reference in New Issue