PEP8 fixes: mixed tabs / spaces, blank lines.
This commit is contained in:
parent
8cbca8c882
commit
164707a64b
|
@ -1,2 +1 @@
|
|||
__version__ = '1.23'
|
||||
|
||||
|
|
|
@ -43,25 +43,31 @@ else:
|
|||
|
||||
try:
|
||||
import zlib
|
||||
|
||||
def decompress(data):
|
||||
return zlib.decompress(data)
|
||||
|
||||
def compress(data):
|
||||
return zlib.compress(data)
|
||||
|
||||
except ImportError:
|
||||
# Unable to import zlib. Attempt to use the System.IO.Compression
|
||||
# library from the .NET framework. (IronPython only)
|
||||
import System
|
||||
from System import IO, Collections, Array
|
||||
|
||||
def _string_to_bytearr(buf):
|
||||
retval = Array.CreateInstance(System.Byte, len(buf))
|
||||
for i in range(len(buf)):
|
||||
retval[i] = ord(buf[i])
|
||||
return retval
|
||||
|
||||
def _bytearr_to_string(bytes):
|
||||
retval = ""
|
||||
for i in range(bytes.Length):
|
||||
retval += chr(bytes[i])
|
||||
return retval
|
||||
|
||||
def _read_bytes(stream):
|
||||
ms = IO.MemoryStream()
|
||||
buf = Array.CreateInstance(System.Byte, 2048)
|
||||
|
@ -74,6 +80,7 @@ except ImportError:
|
|||
retval = ms.ToArray()
|
||||
ms.Close()
|
||||
return retval
|
||||
|
||||
def decompress(data):
|
||||
bytes = _string_to_bytearr(data)
|
||||
ms = IO.MemoryStream()
|
||||
|
@ -84,6 +91,7 @@ except ImportError:
|
|||
retval = _bytearr_to_string(bytes)
|
||||
gz.Close()
|
||||
return retval
|
||||
|
||||
def compress(data):
|
||||
bytes = _string_to_bytearr(data)
|
||||
ms = IO.MemoryStream()
|
||||
|
@ -106,7 +114,7 @@ class FlateDecode(object):
|
|||
predictor = decodeParms.get("/Predictor", 1)
|
||||
except AttributeError:
|
||||
pass # usually an array with a null object was read
|
||||
|
||||
|
||||
# predictor 1 == no predictor
|
||||
if predictor != 1:
|
||||
columns = decodeParms["/Columns"]
|
||||
|
@ -144,6 +152,7 @@ class FlateDecode(object):
|
|||
return compress(data)
|
||||
encode = staticmethod(encode)
|
||||
|
||||
|
||||
class ASCIIHexDecode(object):
|
||||
def decode(data, decodeParms=None):
|
||||
retval = ""
|
||||
|
@ -165,6 +174,7 @@ class ASCIIHexDecode(object):
|
|||
return retval
|
||||
decode = staticmethod(decode)
|
||||
|
||||
|
||||
class LZWDecode(object):
|
||||
"""Taken from:
|
||||
http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
|
||||
|
@ -184,7 +194,6 @@ class LZWDecode(object):
|
|||
def resetDict(self):
|
||||
self.dictlen=258
|
||||
self.bitspercode=9
|
||||
|
||||
|
||||
def nextCode(self):
|
||||
fillbits=self.bitspercode
|
||||
|
@ -196,8 +205,8 @@ class LZWDecode(object):
|
|||
bitsfromhere=8-self.bitpos
|
||||
if bitsfromhere>fillbits:
|
||||
bitsfromhere=fillbits
|
||||
value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
|
||||
(0xff >> (8-bitsfromhere))) <<
|
||||
value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
|
||||
(0xff >> (8-bitsfromhere))) <<
|
||||
(fillbits-bitsfromhere))
|
||||
fillbits -= bitsfromhere
|
||||
self.bitpos += bitsfromhere
|
||||
|
@ -235,17 +244,16 @@ class LZWDecode(object):
|
|||
baos+=p
|
||||
self.dict[self.dictlen] = p;
|
||||
self.dictlen+=1
|
||||
if (self.dictlen >= (1 << self.bitspercode) - 1 and
|
||||
if (self.dictlen >= (1 << self.bitspercode) - 1 and
|
||||
self.bitspercode < 12):
|
||||
self.bitspercode+=1
|
||||
return baos
|
||||
|
||||
|
||||
|
||||
@staticmethod
|
||||
def decode(data,decodeParams=None):
|
||||
return LZWDecode.decoder(data).decode()
|
||||
|
||||
|
||||
class ASCII85Decode(object):
|
||||
def decode(data, decodeParms=None):
|
||||
retval = ""
|
||||
|
@ -299,6 +307,7 @@ class ASCII85Decode(object):
|
|||
return retval
|
||||
decode = staticmethod(decode)
|
||||
|
||||
|
||||
def decodeStreamData(stream):
|
||||
from .generic import NameObject
|
||||
filters = stream.get("/Filter", ())
|
||||
|
|
|
@ -48,6 +48,8 @@ import codecs
|
|||
ObjectPrefix = b_('/<[tf(n%')
|
||||
NumberSigns = b_('+-')
|
||||
IndirectPattern = re.compile(b_(r"(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
|
||||
|
||||
|
||||
def readObject(stream, pdf):
|
||||
tok = stream.read(1)
|
||||
stream.seek(-1, 1) # reset to start
|
||||
|
@ -94,6 +96,7 @@ def readObject(stream, pdf):
|
|||
else:
|
||||
return NumberObject.readFromStream(stream)
|
||||
|
||||
|
||||
class PdfObject(object):
|
||||
def getObject(self):
|
||||
"""Resolves indirect references."""
|
||||
|
@ -225,6 +228,7 @@ class FloatObject(decimal.Decimal, PdfObject):
|
|||
return decimal.Decimal.__new__(cls, utils.str_(value), context)
|
||||
except:
|
||||
return decimal.Decimal.__new__(cls, str(value))
|
||||
|
||||
def __repr__(self):
|
||||
if self == self.to_integral():
|
||||
return str(self.quantize(decimal.Decimal(1)))
|
||||
|
@ -630,6 +634,7 @@ class DictionaryObject(dict, PdfObject):
|
|||
return retval
|
||||
readFromStream = staticmethod(readFromStream)
|
||||
|
||||
|
||||
class TreeObject(DictionaryObject):
|
||||
def __init__(self):
|
||||
DictionaryObject.__init__(self)
|
||||
|
@ -726,7 +731,6 @@ class TreeObject(DictionaryObject):
|
|||
found = True
|
||||
break
|
||||
|
||||
|
||||
prevRef = curRef
|
||||
prev = cur
|
||||
if NameObject('/Next') in cur:
|
||||
|
@ -938,6 +942,7 @@ class RectangleObject(ArrayObject):
|
|||
in (x,y) form.
|
||||
"""
|
||||
|
||||
|
||||
class Field(TreeObject):
|
||||
"""
|
||||
A class representing a field dictionary. This class is accessed through
|
||||
|
@ -1009,6 +1014,7 @@ class Field(TreeObject):
|
|||
See Section 8.5.2 of the PDF 1.7 reference.
|
||||
"""
|
||||
|
||||
|
||||
class Destination(TreeObject):
|
||||
"""
|
||||
A class representing a destination within a PDF file.
|
||||
|
@ -1157,6 +1163,7 @@ def encode_pdfdocencoding(unicode_string):
|
|||
"does not exist in translation table")
|
||||
return retval
|
||||
|
||||
|
||||
def decode_pdfdocencoding(byte_array):
|
||||
retval = u_('')
|
||||
for b in byte_array:
|
||||
|
@ -1211,4 +1218,3 @@ for i in range(256):
|
|||
continue
|
||||
assert char not in _pdfDocEncoding_rev
|
||||
_pdfDocEncoding_rev[char] = i
|
||||
|
||||
|
|
174
PyPDF2/merger.py
174
PyPDF2/merger.py
|
@ -40,6 +40,7 @@ else:
|
|||
from io import FileIO as file
|
||||
StreamIO = BytesIO
|
||||
|
||||
|
||||
class _MergedPage(object):
|
||||
"""
|
||||
_MergedPage is used internally by PdfFileMerger to collect necessary
|
||||
|
@ -50,13 +51,14 @@ class _MergedPage(object):
|
|||
self.pagedata = pagedata
|
||||
self.out_pagedata = None
|
||||
self.id = id
|
||||
|
||||
|
||||
|
||||
class PdfFileMerger(object):
|
||||
"""
|
||||
Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
|
||||
into a single PDF. It can concatenate, slice, insert, or any combination
|
||||
of the above.
|
||||
|
||||
|
||||
See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
|
||||
and :meth:`write()<write>` for usage information.
|
||||
|
||||
|
@ -64,7 +66,7 @@ class PdfFileMerger(object):
|
|||
problems and also causes some correctable problems to be fatal.
|
||||
Defaults to ``True``.
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, strict=True):
|
||||
self.inputs = []
|
||||
self.pages = []
|
||||
|
@ -73,7 +75,7 @@ class PdfFileMerger(object):
|
|||
self.named_dests = []
|
||||
self.id_count = 0
|
||||
self.strict = strict
|
||||
|
||||
|
||||
def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
|
||||
"""
|
||||
Merges the pages from the given file into the output file at the
|
||||
|
@ -85,26 +87,26 @@ class PdfFileMerger(object):
|
|||
:param fileobj: A File Object or an object that supports the standard read
|
||||
and seek methods similar to a File Object. Could also be a
|
||||
string representing a path to a PDF file.
|
||||
|
||||
|
||||
:param str bookmark: Optionally, you may specify a bookmark to be applied at
|
||||
the beginning of the included file by supplying the text of the bookmark.
|
||||
|
||||
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
|
||||
to merge only the specified range of pages from the source
|
||||
document into the output document.
|
||||
|
||||
|
||||
:param bool import_bookmarks: You may prevent the source document's bookmarks
|
||||
from being imported by specifying this as ``False``.
|
||||
"""
|
||||
|
||||
|
||||
# This parameter is passed to self.inputs.append and means
|
||||
# that the stream used was created in this method.
|
||||
my_file = False
|
||||
|
||||
|
||||
# If the fileobj parameter is a string, assume it is a path
|
||||
# and create a file object at that location. If it is a file,
|
||||
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
|
||||
# it is a PdfFileReader, copy that reader's stream into a
|
||||
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
|
||||
# it is a PdfFileReader, copy that reader's stream into a
|
||||
# BytesIO (or StreamIO) stream.
|
||||
# If fileobj is none of the above types, it is not modified
|
||||
if type(fileobj) == string_type:
|
||||
|
@ -116,17 +118,17 @@ class PdfFileMerger(object):
|
|||
fileobj = StreamIO(filecontent)
|
||||
my_file = True
|
||||
elif isinstance(fileobj, PdfFileReader):
|
||||
orig_tell = fileobj.stream.tell()
|
||||
orig_tell = fileobj.stream.tell()
|
||||
fileobj.stream.seek(0)
|
||||
filecontent = StreamIO(fileobj.stream.read())
|
||||
fileobj.stream.seek(orig_tell) # reset the stream to its original location
|
||||
fileobj = filecontent
|
||||
my_file = True
|
||||
|
||||
|
||||
# Create a new PdfFileReader instance using the stream
|
||||
# (either file or BytesIO or StringIO) created above
|
||||
pdfr = PdfFileReader(fileobj, strict=self.strict)
|
||||
|
||||
|
||||
# Find the range of pages to merge.
|
||||
if pages == None:
|
||||
pages = (0, pdfr.getNumPages())
|
||||
|
@ -134,47 +136,45 @@ class PdfFileMerger(object):
|
|||
pages = pages.indices(pdfr.getNumPages())
|
||||
elif not isinstance(pages, tuple):
|
||||
raise TypeError('"pages" must be a tuple of (start, stop[, step])')
|
||||
|
||||
|
||||
srcpages = []
|
||||
if bookmark:
|
||||
bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
|
||||
|
||||
|
||||
outline = []
|
||||
if import_bookmarks:
|
||||
outline = pdfr.getOutlines()
|
||||
outline = self._trim_outline(pdfr, outline, pages)
|
||||
|
||||
|
||||
if bookmark:
|
||||
self.bookmarks += [bookmark, outline]
|
||||
else:
|
||||
self.bookmarks += outline
|
||||
|
||||
|
||||
dests = pdfr.namedDestinations
|
||||
dests = self._trim_dests(pdfr, dests, pages)
|
||||
self.named_dests += dests
|
||||
|
||||
|
||||
# Gather all the pages that are going to be merged
|
||||
for i in range(*pages):
|
||||
pg = pdfr.getPage(i)
|
||||
|
||||
|
||||
id = self.id_count
|
||||
self.id_count += 1
|
||||
|
||||
|
||||
mp = _MergedPage(pg, pdfr, id)
|
||||
|
||||
|
||||
srcpages.append(mp)
|
||||
|
||||
self._associate_dests_to_pages(srcpages)
|
||||
self._associate_bookmarks_to_pages(srcpages)
|
||||
|
||||
|
||||
|
||||
# Slice to insert the pages at the specified position
|
||||
self.pages[position:position] = srcpages
|
||||
|
||||
|
||||
# Keep track of our input files so we can close them later
|
||||
self.inputs.append((fileobj, pdfr, my_file))
|
||||
|
||||
|
||||
|
||||
def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
|
||||
"""
|
||||
Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
|
||||
|
@ -183,7 +183,7 @@ class PdfFileMerger(object):
|
|||
:param fileobj: A File Object or an object that supports the standard read
|
||||
and seek methods similar to a File Object. Could also be a
|
||||
string representing a path to a PDF file.
|
||||
|
||||
|
||||
:param str bookmark: Optionally, you may specify a bookmark to be applied at
|
||||
the beginning of the included file by supplying the text of the bookmark.
|
||||
|
||||
|
@ -194,10 +194,9 @@ class PdfFileMerger(object):
|
|||
:param bool import_bookmarks: You may prevent the source document's bookmarks
|
||||
from being imported by specifying this as ``False``.
|
||||
"""
|
||||
|
||||
|
||||
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
|
||||
|
||||
|
||||
|
||||
def write(self, fileobj):
|
||||
"""
|
||||
Writes all data that has been merged to the given output file.
|
||||
|
@ -210,7 +209,6 @@ class PdfFileMerger(object):
|
|||
fileobj = file(fileobj, 'wb')
|
||||
my_file = True
|
||||
|
||||
|
||||
# Add pages to the PdfFileWriter
|
||||
# The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
|
||||
for page in self.pages:
|
||||
|
@ -222,15 +220,13 @@ class PdfFileMerger(object):
|
|||
# Once all pages are added, create bookmarks to point at those pages
|
||||
self._write_dests()
|
||||
self._write_bookmarks()
|
||||
|
||||
# Write the output to the file
|
||||
|
||||
# Write the output to the file
|
||||
self.output.write(fileobj)
|
||||
|
||||
|
||||
if my_file:
|
||||
fileobj.close()
|
||||
|
||||
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Shuts all file descriptors (input and output) and clears all memory
|
||||
|
@ -240,7 +236,7 @@ class PdfFileMerger(object):
|
|||
for fo, pdfr, mine in self.inputs:
|
||||
if mine:
|
||||
fo.close()
|
||||
|
||||
|
||||
self.inputs = []
|
||||
self.output = None
|
||||
|
||||
|
@ -253,7 +249,7 @@ class PdfFileMerger(object):
|
|||
Example: ``{u'/Title': u'My title'}``
|
||||
"""
|
||||
self.output.addMetadata(infos)
|
||||
|
||||
|
||||
def setPageLayout(self, layout):
|
||||
"""
|
||||
Set the page layout
|
||||
|
@ -289,7 +285,7 @@ class PdfFileMerger(object):
|
|||
|
||||
def _trim_dests(self, pdf, dests, pages):
|
||||
"""
|
||||
Removes any named destinations that are not a part of the specified
|
||||
Removes any named destinations that are not a part of the specified
|
||||
page set.
|
||||
"""
|
||||
new_dests = []
|
||||
|
@ -302,10 +298,10 @@ class PdfFileMerger(object):
|
|||
new_dests.append(o)
|
||||
break
|
||||
return new_dests
|
||||
|
||||
|
||||
def _trim_outline(self, pdf, outline, pages):
|
||||
"""
|
||||
Removes any outline/bookmark entries that are not a part of the
|
||||
Removes any outline/bookmark entries that are not a part of the
|
||||
specified page set.
|
||||
"""
|
||||
new_outline = []
|
||||
|
@ -326,10 +322,10 @@ class PdfFileMerger(object):
|
|||
prev_header_added = True
|
||||
break
|
||||
return new_outline
|
||||
|
||||
|
||||
def _write_dests(self):
|
||||
dests = self.named_dests
|
||||
|
||||
|
||||
for v in dests:
|
||||
pageno = None
|
||||
pdf = None
|
||||
|
@ -342,19 +338,18 @@ class PdfFileMerger(object):
|
|||
break
|
||||
if pageno != None:
|
||||
self.output.addNamedDestinationObject(v)
|
||||
|
||||
|
||||
def _write_bookmarks(self, bookmarks=None, parent=None):
|
||||
|
||||
|
||||
if bookmarks == None:
|
||||
bookmarks = self.bookmarks
|
||||
|
||||
|
||||
last_added = None
|
||||
for b in bookmarks:
|
||||
if isinstance(b, list):
|
||||
self._write_bookmarks(b, last_added)
|
||||
continue
|
||||
|
||||
|
||||
pageno = None
|
||||
pdf = None
|
||||
if '/Page' in b:
|
||||
|
@ -410,31 +405,31 @@ class PdfFileMerger(object):
|
|||
del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
|
||||
|
||||
b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
|
||||
|
||||
|
||||
pageno = i
|
||||
pdf = p.src
|
||||
break
|
||||
if pageno != None:
|
||||
del b['/Page'], b['/Type']
|
||||
last_added = self.output.addBookmarkDict(b, parent)
|
||||
last_added = self.output.addBookmarkDict(b, parent)
|
||||
|
||||
def _associate_dests_to_pages(self, pages):
|
||||
for nd in self.named_dests:
|
||||
pageno = None
|
||||
np = nd['/Page']
|
||||
|
||||
|
||||
if isinstance(np, NumberObject):
|
||||
continue
|
||||
|
||||
|
||||
for p in pages:
|
||||
if np.getObject() == p.pagedata.getObject():
|
||||
pageno = p.id
|
||||
|
||||
|
||||
if pageno != None:
|
||||
nd[NameObject('/Page')] = NumberObject(pageno)
|
||||
else:
|
||||
raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
|
||||
|
||||
|
||||
def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
|
||||
if bookmarks == None:
|
||||
bookmarks = self.bookmarks
|
||||
|
@ -443,35 +438,35 @@ class PdfFileMerger(object):
|
|||
if isinstance(b, list):
|
||||
self._associate_bookmarks_to_pages(pages, b)
|
||||
continue
|
||||
|
||||
|
||||
pageno = None
|
||||
bp = b['/Page']
|
||||
|
||||
|
||||
if isinstance(bp, NumberObject):
|
||||
continue
|
||||
|
||||
|
||||
for p in pages:
|
||||
if bp.getObject() == p.pagedata.getObject():
|
||||
pageno = p.id
|
||||
|
||||
|
||||
if pageno != None:
|
||||
b[NameObject('/Page')] = NumberObject(pageno)
|
||||
else:
|
||||
raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
|
||||
|
||||
|
||||
def findBookmark(self, bookmark, root=None):
|
||||
if root == None:
|
||||
root = self.bookmarks
|
||||
|
||||
for i, b in enumerate(root):
|
||||
if isinstance(b, list):
|
||||
res = self.findBookmark(bookmark, b)
|
||||
if res:
|
||||
return [i] + res
|
||||
elif b == bookmark or b['/Title'] == bookmark:
|
||||
return [i]
|
||||
|
||||
return None
|
||||
if root == None:
|
||||
root = self.bookmarks
|
||||
|
||||
for i, b in enumerate(root):
|
||||
if isinstance(b, list):
|
||||
res = self.findBookmark(bookmark, b)
|
||||
if res:
|
||||
return [i] + res
|
||||
elif b == bookmark or b['/Title'] == bookmark:
|
||||
return [i]
|
||||
|
||||
return None
|
||||
|
||||
def addBookmark(self, title, pagenum, parent=None):
|
||||
"""
|
||||
|
@ -483,28 +478,27 @@ class PdfFileMerger(object):
|
|||
bookmarks.
|
||||
"""
|
||||
if parent == None:
|
||||
iloc = [len(self.bookmarks)-1]
|
||||
iloc = [len(self.bookmarks)-1]
|
||||
elif isinstance(parent, list):
|
||||
iloc = parent
|
||||
iloc = parent
|
||||
else:
|
||||
iloc = self.findBookmark(parent)
|
||||
|
||||
iloc = self.findBookmark(parent)
|
||||
|
||||
dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
|
||||
|
||||
|
||||
if parent == None:
|
||||
self.bookmarks.append(dest)
|
||||
self.bookmarks.append(dest)
|
||||
else:
|
||||
bmparent = self.bookmarks
|
||||
for i in iloc[:-1]:
|
||||
bmparent = bmparent[i]
|
||||
npos = iloc[-1]+1
|
||||
if npos < len(bmparent) and isinstance(bmparent[npos], list):
|
||||
bmparent[npos].append(dest)
|
||||
else:
|
||||
bmparent.insert(npos, [dest])
|
||||
bmparent = self.bookmarks
|
||||
for i in iloc[:-1]:
|
||||
bmparent = bmparent[i]
|
||||
npos = iloc[-1]+1
|
||||
if npos < len(bmparent) and isinstance(bmparent[npos], list):
|
||||
bmparent[npos].append(dest)
|
||||
else:
|
||||
bmparent.insert(npos, [dest])
|
||||
return dest
|
||||
|
||||
|
||||
|
||||
def addNamedDestination(self, title, pagenum):
|
||||
"""
|
||||
Add a destination to the output.
|
||||
|
@ -512,7 +506,7 @@ class PdfFileMerger(object):
|
|||
:param str title: Title to use
|
||||
:param int pagenum: Page number this destination points at.
|
||||
"""
|
||||
|
||||
|
||||
dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
|
||||
self.named_dests.append(dest)
|
||||
|
||||
|
@ -523,12 +517,12 @@ class OutlinesObject(list):
|
|||
self.tree = tree
|
||||
self.pdf = pdf
|
||||
self.parent = parent
|
||||
|
||||
|
||||
def remove(self, index):
|
||||
obj = self[index]
|
||||
del self[index]
|
||||
self.tree.removeChild(obj)
|
||||
|
||||
|
||||
def add(self, title, pagenum):
|
||||
pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
|
||||
action = DictionaryObject()
|
||||
|
@ -547,7 +541,7 @@ class OutlinesObject(list):
|
|||
self.pdf._addObject(bookmark)
|
||||
|
||||
self.tree.addChild(bookmark)
|
||||
|
||||
|
||||
def removeAll(self):
|
||||
for child in [x for x in self.tree.children()]:
|
||||
self.tree.removeChild(child)
|
||||
|
|
|
@ -32,11 +32,11 @@ PAGE_RANGE_HELP = """Remember, page indices start with zero.
|
|||
::-1 all pages in reverse order.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
class PageRange(object):
|
||||
"""
|
||||
"""
|
||||
A slice-like representation of a range of page indices,
|
||||
i.e. page numbers, only starting at zero.
|
||||
i.e. page numbers, only starting at zero.
|
||||
The syntax is like what you would put between brackets [ ].
|
||||
The slice is one of the few Python types that can't be subclassed,
|
||||
but this class converts to and from slices, and allows similar use.
|
||||
|
@ -46,7 +46,7 @@ class PageRange(object):
|
|||
o str() and repr() allow printing.
|
||||
o indices(n) is like slice.indices(n).
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, arg):
|
||||
"""
|
||||
Initialize with either a slice -- giving the equivalent page range,
|
||||
|
@ -67,7 +67,7 @@ class PageRange(object):
|
|||
if isinstance(arg, PageRange):
|
||||
self._slice = arg.to_slice()
|
||||
return
|
||||
|
||||
|
||||
m = isinstance(arg, Str) and re.match(PAGE_RANGE_RE, arg)
|
||||
if not m:
|
||||
raise ParseError(arg)
|
||||
|
@ -77,13 +77,13 @@ class PageRange(object):
|
|||
stop = start + 1 if start != -1 else None
|
||||
self._slice = slice(start, stop)
|
||||
else:
|
||||
self._slice = slice(*[int(g) if g else None
|
||||
self._slice = slice(*[int(g) if g else None
|
||||
for g in m.group(4, 6, 8)])
|
||||
|
||||
|
||||
# Just formatting this when there is __doc__ for __init__
|
||||
if __init__.__doc__:
|
||||
__init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def valid(input):
|
||||
""" True if input is a valid initializer for a PageRange. """
|
||||
|
@ -95,7 +95,7 @@ class PageRange(object):
|
|||
def to_slice(self):
|
||||
""" Return the slice equivalent of this page range. """
|
||||
return self._slice
|
||||
|
||||
|
||||
def __str__(self):
|
||||
""" A string like "1:2:3". """
|
||||
s = self._slice
|
||||
|
@ -127,7 +127,7 @@ def parse_filename_page_ranges(args):
|
|||
"""
|
||||
Given a list of filenames and page ranges, return a list of
|
||||
(filename, page_range) pairs.
|
||||
First arg must be a filename; other ags are filenames, page-range
|
||||
First arg must be a filename; other ags are filenames, page-range
|
||||
expressions, slice objects, or PageRange objects.
|
||||
A filename not followed by a page range indicates all pages of the file.
|
||||
"""
|
||||
|
@ -146,7 +146,7 @@ def parse_filename_page_ranges(args):
|
|||
# New filename or end of list--do all of the previous file?
|
||||
if pdf_filename and not did_page_range:
|
||||
pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
|
||||
|
||||
|
||||
pdf_filename = arg
|
||||
did_page_range = False
|
||||
return pairs
|
||||
|
|
|
@ -74,6 +74,7 @@ else:
|
|||
from hashlib import md5
|
||||
import uuid
|
||||
|
||||
|
||||
class PdfFileWriter(object):
|
||||
"""
|
||||
This class supports writing PDF files out, given pages produced by another
|
||||
|
@ -516,7 +517,6 @@ class PdfFileWriter(object):
|
|||
|
||||
return bookmarkRef
|
||||
|
||||
|
||||
def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args):
|
||||
"""
|
||||
Add a bookmark to this PDF file.
|
||||
|
@ -553,7 +553,6 @@ class PdfFileWriter(object):
|
|||
if parent == None:
|
||||
parent = outlineRef
|
||||
|
||||
|
||||
bookmark = TreeObject()
|
||||
|
||||
bookmark.update({
|
||||
|
@ -871,6 +870,7 @@ class PdfFileWriter(object):
|
|||
"""Read and write property accessing the :meth:`getPageMode()<PdfFileWriter.getPageMode>`
|
||||
and :meth:`setPageMode()<PdfFileWriter.setPageMode>` methods."""
|
||||
|
||||
|
||||
class PdfFileReader(object):
|
||||
"""
|
||||
Initializes a PdfFileReader object. This operation can take some time, as
|
||||
|
@ -1347,7 +1347,6 @@ class PdfFileReader(object):
|
|||
if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.")
|
||||
return NullObject()
|
||||
|
||||
|
||||
def getObject(self, indirectReference):
|
||||
debug = False
|
||||
if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation))
|
||||
|
@ -1580,6 +1579,7 @@ class PdfFileReader(object):
|
|||
assert len(entrySizes) >= 3
|
||||
if self.strict and len(entrySizes) > 3:
|
||||
raise utils.PdfReadError("Too many entry sizes: %s" %entrySizes)
|
||||
|
||||
def getEntry(i):
|
||||
# Reads the correct number of bytes for each entry. See the
|
||||
# discussion of the W parameter in PDF spec table 17.
|
||||
|
@ -1683,7 +1683,6 @@ class PdfFileReader(object):
|
|||
#if not, then either it's just plain wrong, or the non-zero-index is actually correct
|
||||
stream.seek(loc, 0) #return to where it was
|
||||
|
||||
|
||||
def _zeroXref(self, generation):
|
||||
self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) )
|
||||
|
||||
|
@ -1827,14 +1826,17 @@ def getRectangle(self, name, defaults):
|
|||
setRectangle(self, name, retval)
|
||||
return retval
|
||||
|
||||
|
||||
def setRectangle(self, name, value):
|
||||
if not isinstance(name, NameObject):
|
||||
name = NameObject(name)
|
||||
self[name] = value
|
||||
|
||||
|
||||
def deleteRectangle(self, name):
|
||||
del self[name]
|
||||
|
||||
|
||||
def createRectangleAccessor(name, fallback):
|
||||
return \
|
||||
property(
|
||||
|
@ -1843,6 +1845,7 @@ def createRectangleAccessor(name, fallback):
|
|||
lambda self: deleteRectangle(self, name)
|
||||
)
|
||||
|
||||
|
||||
class PageObject(DictionaryObject):
|
||||
"""
|
||||
This class represents a single page within a PDF file. Typically this
|
||||
|
@ -2412,6 +2415,7 @@ class PageObject(DictionaryObject):
|
|||
page's creator.
|
||||
"""
|
||||
|
||||
|
||||
class ContentStream(DecodedStreamObject):
|
||||
def __init__(self, stream, pdf):
|
||||
self.pdf = pdf
|
||||
|
@ -2525,6 +2529,7 @@ class ContentStream(DecodedStreamObject):
|
|||
|
||||
_data = property(_getData, _setData)
|
||||
|
||||
|
||||
class DocumentInformation(DictionaryObject):
|
||||
"""
|
||||
A class representing the basic document metadata provided in a PDF File.
|
||||
|
@ -2588,6 +2593,7 @@ class DocumentInformation(DictionaryObject):
|
|||
producer_raw = property(lambda self: self.get("/Producer"))
|
||||
"""The "raw" version of producer; can return a ``ByteStringObject``."""
|
||||
|
||||
|
||||
def convertToInt(d, size):
|
||||
if size > 8:
|
||||
raise utils.PdfReadError("invalid size in convertToInt")
|
||||
|
@ -2600,6 +2606,7 @@ _encryption_padding = b_('\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56') + \
|
|||
b_('\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \
|
||||
b_('\xa9\xfe\x64\x53\x69\x7a')
|
||||
|
||||
|
||||
# Implementation of algorithm 3.2 of the PDF standard security handler,
|
||||
# section 3.5.2 of the PDF 1.6 reference.
|
||||
def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True):
|
||||
|
@ -2643,6 +2650,7 @@ def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
|
|||
# entry.
|
||||
return md5_hash[:keylen]
|
||||
|
||||
|
||||
# Implementation of algorithm 3.3 of the PDF standard security handler,
|
||||
# section 3.5.2 of the PDF 1.6 reference.
|
||||
def _alg33(owner_pwd, user_pwd, rev, keylen):
|
||||
|
@ -2670,6 +2678,7 @@ def _alg33(owner_pwd, user_pwd, rev, keylen):
|
|||
# the /O entry in the encryption dictionary.
|
||||
return val
|
||||
|
||||
|
||||
# Steps 1-4 of algorithm 3.3
|
||||
def _alg33_1(password, rev, keylen):
|
||||
# 1. Pad or truncate the owner password string as described in step 1 of
|
||||
|
@ -2692,6 +2701,7 @@ def _alg33_1(password, rev, keylen):
|
|||
key = md5_hash[:keylen]
|
||||
return key
|
||||
|
||||
|
||||
# Implementation of algorithm 3.4 of the PDF standard security handler,
|
||||
# section 3.5.2 of the PDF 1.6 reference.
|
||||
def _alg34(password, owner_entry, p_entry, id1_entry):
|
||||
|
@ -2706,6 +2716,7 @@ def _alg34(password, owner_entry, p_entry, id1_entry):
|
|||
# encryption dictionary.
|
||||
return U, key
|
||||
|
||||
|
||||
# Implementation of algorithm 3.4 of the PDF standard security handler,
|
||||
# section 3.5.2 of the PDF 1.6 reference.
|
||||
def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt):
|
||||
|
|
|
@ -59,6 +59,7 @@ def formatWarning(message, category, filename, lineno, line=None):
|
|||
file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
|
||||
return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
|
||||
|
||||
|
||||
def readUntilWhitespace(stream, maxchars=None):
|
||||
"""
|
||||
Reads non-whitespace characters and returns them.
|
||||
|
@ -74,6 +75,7 @@ def readUntilWhitespace(stream, maxchars=None):
|
|||
break
|
||||
return txt
|
||||
|
||||
|
||||
def readNonWhitespace(stream):
|
||||
"""
|
||||
Finds and reads the next non-whitespace character (ignores whitespace).
|
||||
|
@ -83,6 +85,7 @@ def readNonWhitespace(stream):
|
|||
tok = stream.read(1)
|
||||
return tok
|
||||
|
||||
|
||||
def skipOverWhitespace(stream):
|
||||
"""
|
||||
Similar to readNonWhitespace, but returns a Boolean if more than
|
||||
|
@ -95,6 +98,7 @@ def skipOverWhitespace(stream):
|
|||
cnt+=1
|
||||
return (cnt > 1)
|
||||
|
||||
|
||||
def skipOverComment(stream):
|
||||
tok = stream.read(1)
|
||||
stream.seek(-1, 1)
|
||||
|
@ -102,6 +106,7 @@ def skipOverComment(stream):
|
|||
while tok not in (b_('\n'), b_('\r')):
|
||||
tok = stream.read(1)
|
||||
|
||||
|
||||
def readUntilRegex(stream, regex, ignore_eof=False):
|
||||
"""
|
||||
Reads until the regular expression pattern matched (ignore the match)
|
||||
|
@ -125,6 +130,7 @@ def readUntilRegex(stream, regex, ignore_eof=False):
|
|||
name += tok
|
||||
return name
|
||||
|
||||
|
||||
class ConvertFunctionsToVirtualList(object):
|
||||
def __init__(self, lengthFunction, getFunction):
|
||||
self.lengthFunction = lengthFunction
|
||||
|
@ -148,6 +154,7 @@ class ConvertFunctionsToVirtualList(object):
|
|||
raise IndexError("sequence index out of range")
|
||||
return self.getFunction(index)
|
||||
|
||||
|
||||
def RC4_encrypt(key, plaintext):
|
||||
S = [i for i in range(256)]
|
||||
j = 0
|
||||
|
@ -164,12 +171,14 @@ def RC4_encrypt(key, plaintext):
|
|||
retval += b_(chr(ord_(plaintext[x]) ^ t))
|
||||
return retval
|
||||
|
||||
|
||||
def matrixMultiply(a, b):
|
||||
return [[sum([float(i)*float(j)
|
||||
for i, j in zip(row, col)]
|
||||
) for col in zip(*b)]
|
||||
for row in a]
|
||||
|
||||
|
||||
def markLocation(stream):
|
||||
"""Creates text file showing current location in context."""
|
||||
# Mainly for debugging
|
||||
|
@ -182,18 +191,23 @@ def markLocation(stream):
|
|||
outputDoc.close()
|
||||
stream.seek(-RADIUS, 1)
|
||||
|
||||
|
||||
class PyPdfError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class PdfReadError(PyPdfError):
|
||||
pass
|
||||
|
||||
|
||||
class PageSizeNotDefinedError(PyPdfError):
|
||||
pass
|
||||
|
||||
|
||||
class PdfReadWarning(UserWarning):
|
||||
pass
|
||||
|
||||
|
||||
class PdfStreamError(PdfReadError):
|
||||
pass
|
||||
|
||||
|
@ -203,6 +217,7 @@ if sys.version_info[0] < 3:
|
|||
return s
|
||||
else:
|
||||
B_CACHE = {}
|
||||
|
||||
def b_(s):
|
||||
bc = B_CACHE
|
||||
if s in bc:
|
||||
|
@ -214,6 +229,8 @@ else:
|
|||
if len(s) < 2:
|
||||
bc[s] = r
|
||||
return r
|
||||
|
||||
|
||||
def u_(s):
|
||||
if sys.version_info[0] < 3:
|
||||
return unicode(s, 'unicode_escape')
|
||||
|
@ -230,24 +247,28 @@ def str_(b):
|
|||
else:
|
||||
return b
|
||||
|
||||
|
||||
def ord_(b):
|
||||
if sys.version_info[0] < 3 or type(b) == str:
|
||||
return ord(b)
|
||||
else:
|
||||
return b
|
||||
|
||||
|
||||
def chr_(c):
|
||||
if sys.version_info[0] < 3:
|
||||
return c
|
||||
else:
|
||||
return chr(c)
|
||||
|
||||
|
||||
def barray(b):
|
||||
if sys.version_info[0] < 3:
|
||||
return b
|
||||
else:
|
||||
return bytearray(b)
|
||||
|
||||
|
||||
def hexencode(b):
|
||||
if sys.version_info[0] < 3:
|
||||
return b.encode('hex')
|
||||
|
@ -256,6 +277,7 @@ def hexencode(b):
|
|||
coder = codecs.getencoder('hex_codec')
|
||||
return coder(b)[0]
|
||||
|
||||
|
||||
def hexStr(num):
|
||||
return hex(num).replace('L', '')
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@ iso8601 = re.compile("""
|
|||
)?
|
||||
""", re.VERBOSE)
|
||||
|
||||
|
||||
class XmpInformation(PdfObject):
|
||||
"""
|
||||
An object that represents Adobe XMP metadata.
|
||||
|
@ -355,5 +356,3 @@ class XmpInformation(PdfObject):
|
|||
:return: a dictionary of key/value items for custom metadata properties.
|
||||
:rtype: dict
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ from PyPDF2 import PdfFileWriter, PdfFileReader
|
|||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def main():
|
||||
if (len(sys.argv) != 3):
|
||||
print("usage: python 2-up.py input_file output_file")
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from PyPDF2 import PdfFileMerger
|
||||
|
||||
merger = PdfFileMerger()
|
||||
|
||||
|
||||
input1 = open("document1.pdf", "rb")
|
||||
input2 = open("document2.pdf", "rb")
|
||||
input3 = open("document3.pdf", "rb")
|
||||
|
|
|
@ -14,12 +14,13 @@ TEXT = """%s page %d of %d
|
|||
a wonderful file
|
||||
created with Sample_Code/makesimple.py"""
|
||||
|
||||
|
||||
def make_pdf_file(output_filename, np):
|
||||
title = output_filename
|
||||
c = canvas.Canvas(output_filename, pagesize=(8.5 * inch, 11 * inch))
|
||||
c.setStrokeColorRGB(0,0,0)
|
||||
c.setFillColorRGB(0,0,0)
|
||||
c.setFont("Helvetica", 12 * point)
|
||||
c.setFont("Helvetica", 12 * point)
|
||||
for pn in range(1, np + 1):
|
||||
v = 10 * inch
|
||||
for subtline in (TEXT % (output_filename, pn, np)).split( '\n' ):
|
||||
|
@ -27,7 +28,7 @@ def make_pdf_file(output_filename, np):
|
|||
v -= 12 * point
|
||||
c.showPage()
|
||||
c.save()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
nps = [None, 5, 11, 17]
|
||||
for i, np in enumerate(nps):
|
||||
|
|
|
@ -9,7 +9,7 @@ PAGE RANGES are like Python slices.
|
|||
{page_range_help}
|
||||
EXAMPLES
|
||||
pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1
|
||||
Concatenate all of head.pdf, all but page seven of content.pdf,
|
||||
Concatenate all of head.pdf, all but page seven of content.pdf,
|
||||
and the last page of tail.pdf, producing output.pdf.
|
||||
|
||||
pdfcat chapter*.pdf >book.pdf
|
||||
|
@ -26,6 +26,7 @@ from __future__ import print_function
|
|||
import argparse
|
||||
from PyPDF2.pagerange import PAGE_RANGE_HELP
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__.format(page_range_help=PAGE_RANGE_HELP),
|
||||
|
@ -77,4 +78,3 @@ if __name__ == "__main__":
|
|||
merger.write(output)
|
||||
# In 3.0, input files must stay open until output is written.
|
||||
# Not closing the in_fs because this script exits now.
|
||||
|
||||
|
|
7
setup.py
7
setup.py
|
@ -5,7 +5,7 @@ import re
|
|||
|
||||
long_description = """
|
||||
A Pure-Python library built as a PDF toolkit. It is capable of:
|
||||
|
||||
|
||||
- extracting document information (title, author, ...)
|
||||
- splitting documents page by page
|
||||
- merging documents page by page
|
||||
|
@ -25,9 +25,9 @@ verstrline = open(VERSIONFILE, "rt").read()
|
|||
VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]"
|
||||
mo = re.search(VSRE, verstrline, re.M)
|
||||
if mo:
|
||||
verstr = mo.group(1)
|
||||
verstr = mo.group(1)
|
||||
else:
|
||||
raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE))
|
||||
raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE))
|
||||
|
||||
setup(
|
||||
name="PyPDF2",
|
||||
|
@ -50,4 +50,3 @@ setup(
|
|||
],
|
||||
packages=["PyPDF2"],
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue