debian-pdfrw/tests/test_roundtrip.py

139 lines
4.3 KiB
Python
Executable File

#! /usr/bin/env python
# A part of pdfrw (https://github.com/pmaupin/pdfrw)
# Copyright (C) 2015 Patrick Maupin, Austin, Texas
# MIT license -- See LICENSE.txt for details
'''
Run from the directory above like so:
python -m tests.test_roundtrip
A PDF that has been determined to be good or bad
should be added to expected.txt with either a good
checksum, or just the word "fail".
These tests are incomplete, but they allow us to try
out various PDFs. There is a collection of difficult
PDFs available on github.
In order to use them:
1) Insure that github.com/pmaupin/static_pdfs is on your path.
2) Use the imagemagick compare program to look at differences
between the static_pdfs/global directory and the tmp_results
directory after you run this.
'''
import os
import hashlib
import pdfrw
import static_pdfs
import expected
from pdfrw.py23_diffs import convert_store
try:
import unittest2 as unittest
except ImportError:
import unittest
class TestOnePdf(unittest.TestCase):
def roundtrip(self, testname, basename, srcf, decompress=False,
compress=False, repaginate=False):
dstd = os.path.join(expected.result_dir, testname)
if not os.path.exists(dstd):
os.makedirs(dstd)
dstf = os.path.join(dstd, basename)
hashfile = os.path.join(expected.result_dir, 'hashes.txt')
hashkey = '%s/%s' % (testname, basename)
hash = '------no-file-generated---------'
expects = expected.results[hashkey]
# If the test has been deliberately skipped,
# we are done. Otherwise, execute it even
# if we don't know about it yet, so we have
# results to compare.
result = 'fail'
size = 0
try:
if 'skip' in expects:
result = 'skip requested'
return self.skipTest(result)
elif 'xfail' in expects:
result = 'xfail requested'
return self.fail(result)
exists = os.path.exists(dstf)
if expects or not exists:
if exists:
os.remove(dstf)
trailer = pdfrw.PdfReader(srcf, decompress=decompress,
verbose=False)
if trailer.Encrypt:
result = 'skip -- encrypt'
hash = '------skip-encrypt-no-file------'
return self.skipTest('File encrypted')
writer = pdfrw.PdfWriter(dstf, compress=compress)
if repaginate:
writer.addpages(trailer.pages)
else:
writer.trailer = trailer
writer.write()
with open(dstf, 'rb') as f:
data = f.read()
size = len(data)
if data:
hash = hashlib.md5(data).hexdigest()
else:
os.remove(dstf)
if expects:
if len(expects) == 1:
expects, = expects
self.assertEqual(hash, expects)
else:
self.assertIn(hash, expects)
result = 'pass'
else:
result = 'skip'
self.skipTest('No hash available')
finally:
result = '%8d %-20s %s %s\n' % (size, result, hashkey, hash)
with open(hashfile, 'ab') as f:
f.write(convert_store(result))
def build_tests():
def test_closure(*args, **kw):
def test(self):
self.roundtrip(*args, **kw)
return test
for mytest, repaginate, decompress, compress in (
('simple', False, False, False),
('repaginate', True, False, False),
('decompress', False, True, False),
('compress', False, True, True),
):
for srcf in static_pdfs.pdffiles[0]:
basename = os.path.basename(srcf)
test_name = 'test_%s_%s' % (mytest, basename)
test = test_closure(mytest, basename, srcf,
repaginate=repaginate,
decompress=decompress,
compress=compress,
)
setattr(TestOnePdf, test_name, test)
build_tests()
def main():
unittest.main()
if __name__ == '__main__':
main()