Started work on a test suite, added a test for loading and decoding a PDF file

- Added resources for the test
This commit is contained in:
Rob Oakes 2015-06-04 05:49:25 -04:00
parent 0a7b72d135
commit aa69bc95d7
4 changed files with 36 additions and 0 deletions

BIN
Resources/crazyones.pdf Normal file

Binary file not shown.

1
Resources/crazyones.txt Normal file
View File

@ -0,0 +1 @@
TheCrazyOnesOctober14,1998Herestothecrazyones.Themis˝ts.Therebels.Thetroublemakers.Theroundpegsinthesquareholes.Theoneswhoseethingsdi˙erently.Theyrenotfondofrules.Andtheyhavenorespectforthestatusquo.Youcanquotethem,disagreewiththem,glorifyorvilifythem.Abouttheonlythingyoucantdoisignorethem.Becausetheychangethings.Theyinvent.Theyimagine.Theyheal.Theyexplore.Theycreate.Theyinspire.Theypushthehumanraceforward.Maybetheyhavetobecrazy.Howelsecanyoustareatanemptycanvasandseeaworkofart?Orsitinsilenceandhearasongthatsneverbeenwritten?Orgazeataredplanetandseealaboratoryonwheels?Wemaketoolsforthesekindsofpeople.Whilesomeseethemasthecrazyones,weseegenius.Becausethepeoplewhoarecrazyenoughtothinktheycanchangetheworld,aretheoneswhodo.

0
Tests/__init__.py Normal file
View File

35
Tests/tests.py Normal file
View File

@ -0,0 +1,35 @@
import os, sys, unittest
# Configure path environment
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, 'Resources')
sys.path.append(PROJECT_ROOT)
# Test imports
import unittest
from PyPDF2 import PdfFileReader
class PdfReaderTestCases(unittest.TestCase):
def test_PdfReaderFileLoad(self):
''' Test loading and parsing of a file. Extract text of the file and compare to expected
textual output. Expected outcome: file loads, text matches expected.
'''
with open(os.path.join(RESOURCE_ROOT, 'crazyones.pdf'), 'rb') as inputfile:
# Load PDF file from file
ipdf = PdfFileReader(inputfile)
ipdf_p1 = ipdf.getPage(0)
# Retrieve the text of the PDF
pdftext_file = open(os.path.join(RESOURCE_ROOT, 'crazyones.txt'), 'r')
pdftext = pdftext_file.read()
ipdf_p1_text = ipdf_p1.extractText()
# Compare the text of the PDF to a known source
self.assertEqual(ipdf_p1_text.encode('utf-8', errors='ignore'), pdftext,
msg='PDF extracted text differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n'
% (pdftext, ipdf_p1_text.encode('utf-8', errors='ignore')))