PDF extraction error handling

This commit is contained in:
Sylvain Pelissier 2016-01-13 09:16:56 +01:00
parent c83cbd87e7
commit 7bc62cd896
2 changed files with 31 additions and 21 deletions

Binary file not shown.

View File

@ -17,26 +17,36 @@ pdf = sys.argv[1]
if __name__ == '__main__':
input1 = PyPDF2.PdfFileReader(open(pdf, "rb"))
page0 = input1.getPage(0)
xObject = page0['/Resources']['/XObject'].getObject()
page0 = input1.getPage(2)
for obj in xObject:
if xObject[obj]['/Subtype'] == '/Image':
size = (xObject[obj]['/Width'], xObject[obj]['/Height'])
data = xObject[obj].getData()
if xObject[obj]['/ColorSpace'] == '/DeviceRGB':
mode = "RGB"
else:
mode = "P"
if '/XObject' in page0['/Resources']:
xObject = page0['/Resources']['/XObject'].getObject()
if xObject[obj]['/Filter'] == '/FlateDecode':
img = Image.frombytes(mode, size, data)
img.save(obj[1:] + ".png")
elif xObject[obj]['/Filter'] == '/DCTDecode':
img = open(obj[1:] + ".jpg", "wb")
img.write(data)
img.close()
elif xObject[obj]['/Filter'] == '/JPXDecode':
img = open(obj[1:] + ".jp2", "wb")
img.write(data)
img.close()
print(xObject)
for obj in xObject:
print(xObject[obj])
if xObject[obj]['/Subtype'] == '/Image':
size = (xObject[obj]['/Width'], xObject[obj]['/Height'])
data = xObject[obj].getData()
if xObject[obj]['/ColorSpace'] == '/DeviceRGB':
mode = "RGB"
else:
mode = "P"
if '/Filter' in xObject[obj]:
if xObject[obj]['/Filter'] == '/FlateDecode':
img = Image.frombytes(mode, size, data)
img.save(obj[1:] + ".png")
elif xObject[obj]['/Filter'] == '/DCTDecode':
img = open(obj[1:] + ".jpg", "wb")
img.write(data)
img.close()
elif xObject[obj]['/Filter'] == '/JPXDecode':
img = open(obj[1:] + ".jp2", "wb")
img.write(data)
img.close()
else:
img = Image.frombytes(mode, size, data)
img.save(obj[1:] + ".png")
else:
print("No image found.")