summaryrefslogtreecommitdiffstats
path: root/ocrloader.py
diff options
context:
space:
mode:
authorFrédéric Péters <fpeters@entrouvert.com>2014-09-23 12:25:40 (GMT)
committerFrédéric Péters <fpeters@entrouvert.com>2014-09-23 12:25:40 (GMT)
commit410ea2902b8aa70b6198c1a8623a1b3a01d7576d (patch)
tree22518172e636a11008b230e7dcfad9e8556eb3e7 /ocrloader.py
parentb559f9772219379e3c3e8ad23e6459829fea5e50 (diff)
downloadpfwbged.ocrloader-410ea2902b8aa70b6198c1a8623a1b3a01d7576d.zip
pfwbged.ocrloader-410ea2902b8aa70b6198c1a8623a1b3a01d7576d.tar.gz
pfwbged.ocrloader-410ea2902b8aa70b6198c1a8623a1b3a01d7576d.tar.bz2
check ocr doesn't produce empty files
Diffstat (limited to 'ocrloader.py')
-rwxr-xr-xocrloader.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/ocrloader.py b/ocrloader.py
index 5da4210..975a26d 100755
--- a/ocrloader.py
+++ b/ocrloader.py
@@ -44,16 +44,23 @@ def process(cfg, filename, payload, enable_ocr=True):
fd, tmpfilename = tempfile.mkstemp(suffix='.pdf', prefix='ocrloader-')
os.write(fd, payload)
os.close(fd)
- logging.debug(' running OCR on file')
- subprocess.call(['/opt/ABBYYOCR9/abbyyocr9',
+ logging.debug(' running OCR on file (%s / %s)' % (tmpfilename, ocr_filename))
+ cmd = ['/opt/ABBYYOCR9/abbyyocr9', '-pi',
'-rl', 'French', '-if', tmpfilename,
'-f', 'PDF', '-pem', 'ImageOnText', '-pfpr', '150',
- '-pfq', '100', '-of', ocr_filename])
+ '-pfq', '100', '-of', ocr_filename]
+ logging.debug(' %s' % ' '.join(cmd))
+ subprocess.call(cmd)
+ if os.stat(ocr_filename)[6] == 0:
+ os.unlink(ocr_filename)
if not os.path.exists(ocr_filename):
logging.error('failed to OCR %s', filename)
file('/tmp/' + filename, 'w').write(payload) # keep it for inspection
return False
+ if os.stat(ocr_filename)[6] == 0:
+ os.unlink(ocr_filename)
+
if cfg.get('store_path'):
logging.debug(' storing file locally')
shutil.copy(ocr_filename, os.path.join(cfg.get('store_path'), filename))