PDF extraction error handling
This commit is contained in:
parent
c83cbd87e7
commit
7bc62cd896
Binary file not shown.
|
@ -17,26 +17,36 @@ pdf = sys.argv[1]
|
|||
|
||||
if __name__ == '__main__':
|
||||
input1 = PyPDF2.PdfFileReader(open(pdf, "rb"))
|
||||
page0 = input1.getPage(0)
|
||||
xObject = page0['/Resources']['/XObject'].getObject()
|
||||
page0 = input1.getPage(2)
|
||||
|
||||
for obj in xObject:
|
||||
if xObject[obj]['/Subtype'] == '/Image':
|
||||
size = (xObject[obj]['/Width'], xObject[obj]['/Height'])
|
||||
data = xObject[obj].getData()
|
||||
if xObject[obj]['/ColorSpace'] == '/DeviceRGB':
|
||||
mode = "RGB"
|
||||
else:
|
||||
mode = "P"
|
||||
if '/XObject' in page0['/Resources']:
|
||||
xObject = page0['/Resources']['/XObject'].getObject()
|
||||
|
||||
if xObject[obj]['/Filter'] == '/FlateDecode':
|
||||
img = Image.frombytes(mode, size, data)
|
||||
img.save(obj[1:] + ".png")
|
||||
elif xObject[obj]['/Filter'] == '/DCTDecode':
|
||||
img = open(obj[1:] + ".jpg", "wb")
|
||||
img.write(data)
|
||||
img.close()
|
||||
elif xObject[obj]['/Filter'] == '/JPXDecode':
|
||||
img = open(obj[1:] + ".jp2", "wb")
|
||||
img.write(data)
|
||||
img.close()
|
||||
print(xObject)
|
||||
for obj in xObject:
|
||||
print(xObject[obj])
|
||||
if xObject[obj]['/Subtype'] == '/Image':
|
||||
size = (xObject[obj]['/Width'], xObject[obj]['/Height'])
|
||||
data = xObject[obj].getData()
|
||||
if xObject[obj]['/ColorSpace'] == '/DeviceRGB':
|
||||
mode = "RGB"
|
||||
else:
|
||||
mode = "P"
|
||||
|
||||
if '/Filter' in xObject[obj]:
|
||||
if xObject[obj]['/Filter'] == '/FlateDecode':
|
||||
img = Image.frombytes(mode, size, data)
|
||||
img.save(obj[1:] + ".png")
|
||||
elif xObject[obj]['/Filter'] == '/DCTDecode':
|
||||
img = open(obj[1:] + ".jpg", "wb")
|
||||
img.write(data)
|
||||
img.close()
|
||||
elif xObject[obj]['/Filter'] == '/JPXDecode':
|
||||
img = open(obj[1:] + ".jp2", "wb")
|
||||
img.write(data)
|
||||
img.close()
|
||||
else:
|
||||
img = Image.frombytes(mode, size, data)
|
||||
img.save(obj[1:] + ".png")
|
||||
else:
|
||||
print("No image found.")
|
||||
|
|
Loading…
Reference in New Issue