Add addJS method to PdfFileWriter

2014-07-02 15:50:22 +01:00 · 2014-07-02 15:50:22 +01:00 · f71e4979fd
parent 6e448ae1c9
commit f71e4979fd
2 changed files with 106 additions and 86 deletions
--- a/PyPDF2/pdf.py
+++ b/PyPDF2/pdf.py
@ -106,7 +106,8 @@ class PdfFileWriter(object):
            NameObject("/Type"): NameObject("/Catalog"),
            NameObject("/Pages"): self._pages,
            })
-        self._root = self._addObject(root)
+        self._root = None
+        self.root = root

    def _addObject(self, obj):
        self._objects.append(obj)
@ -209,6 +210,17 @@ class PdfFileWriter(object):
        self.insertPage(page, index)
        return page

+    def addJS(self, javascript):
+        js = DictionaryObject()
+        js.update({
+                NameObject("/Type"): NameObject("/Action"),
+                NameObject("/S"): NameObject("/JavaScript"),
+                NameObject("/JS"): NameObject("(%s)" % javascript)
+                })
+        self.root.update({
+                NameObject("/OpenAction"): self._addObject(js)
+                })
+
    def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
        """
        Encrypt this PDF file with the PDF Standard encryption handler.
@ -268,6 +280,9 @@ class PdfFileWriter(object):
        debug = False
        import struct

+        if(not self._root):
+            self._root = self._addObject(self.root)
+
        externalReferenceMap = {}

        # PDF objects sometimes have circular references to their /Page objects
@ -333,7 +348,7 @@ class PdfFileWriter(object):
        if hasattr(self, "_encrypt"):
            trailer[NameObject("/Encrypt")] = self._encrypt
        trailer.writeToStream(stream, None)
-        
+
        # eof
        stream.write(b_("\nstartxref\n%s\n%%%%EOF\n" % (xref_location)))

@ -399,13 +414,13 @@ class PdfFileWriter(object):
                return newobj
        else:
            return data
-    
+
    def getReference(self, obj):
        idnum = self._objects.index(obj) + 1
        ref = IndirectObject(idnum, 0, self)
        assert ref.getObject() == obj
        return ref
-    
+
    def getOutlineRoot(self):
        root = self.getObject(self._root)

@ -413,15 +428,15 @@ class PdfFileWriter(object):
            outline = root['/Outlines']
            idnum = self._objects.index(outline) + 1
            outlineRef = IndirectObject(idnum, 0, self)
-            assert outlineRef.getObject() == outline 
+            assert outlineRef.getObject() == outline
        else:
-            outline = TreeObject() 
+            outline = TreeObject()
            outline.update({ })
            outlineRef = self._addObject(outline)
            root[NameObject('/Outlines')] = outlineRef
-            
+
        return outline
- 
+
    def getNamedDestRoot(self):
        root = self.getObject(self._root)

@ -429,12 +444,12 @@ class PdfFileWriter(object):
            names = root['/Names']
            idnum = self._objects.index(names) + 1
            namesRef = IndirectObject(idnum, 0, self)
-            assert namesRef.getObject() == names 
+            assert namesRef.getObject() == names
            if '/Dests' in names and isinstance(names['/Dests'], DictionaryObject):
                dests = names['/Dests']
                idnum = self._objects.index(dests) + 1
                destsRef = IndirectObject(idnum, 0, self)
-                assert destsRef.getObject() == dests 
+                assert destsRef.getObject() == dests
                if '/Names' in dests:
                    nd = dests['/Names']
                else:
@ -446,7 +461,7 @@ class PdfFileWriter(object):
                names[NameObject('/Dests')] = destsRef
                nd = ArrayObject()
                dests[NameObject('/Names')] = nd
-                
+
        else:
            names = DictionaryObject()
            namesRef = self._addObject(names)
@ -456,49 +471,49 @@ class PdfFileWriter(object):
            names[NameObject('/Dests')] = destsRef
            nd = ArrayObject()
            dests[NameObject('/Names')] = nd
-            
+
        return nd
-    
+
    def addBookmarkDestination(self, dest, parent=None):
        destRef = self._addObject(dest)

        outlineRef = self.getOutlineRoot()
-        
+
        if parent == None:
            parent = outlineRef

        parent = parent.getObject()
        #print parent.__class__.__name__
        parent.addChild(destRef, self)
-        
+
        return destRef
-    
+
    def addBookmarkDict(self, bookmark, parent=None):
        bookmarkObj = TreeObject()
        for k, v in list(bookmark.items()):
            bookmarkObj[NameObject(str(k))] = v
        bookmarkObj.update(bookmark)
-        
+
        if '/A' in bookmark:
            action = DictionaryObject()
            for k, v in list(bookmark['/A'].items()):
                action[NameObject(str(k))] = v
            actionRef = self._addObject(action)
            bookmarkObj[NameObject('/A')] = actionRef
-            
+
        bookmarkRef = self._addObject(bookmarkObj)

        outlineRef = self.getOutlineRoot()
-        
+
        if parent == None:
            parent = outlineRef
-        
+
        parent = parent.getObject()
        parent.addChild(bookmarkRef, self)
-        
-        return bookmarkRef       
-    
-            
+
+        return bookmarkRef
+
+
    def addBookmark(self, title, pagenum, parent=None):
        """
        Add a bookmark to this PDF file.
@ -517,10 +532,10 @@ class PdfFileWriter(object):
        actionRef = self._addObject(action)

        outlineRef = self.getOutlineRoot()
-        
+
        if parent == None:
            parent = outlineRef
-            
+

        bookmark = TreeObject()

@ -530,10 +545,10 @@ class PdfFileWriter(object):
        })

        bookmarkRef = self._addObject(bookmark)
-        
+
        parent = parent.getObject()
        parent.addChild(bookmarkRef, self)
-        
+
        return bookmarkRef

    def addNamedDestinationObject(self, dest):
@ -541,8 +556,8 @@ class PdfFileWriter(object):

        nd = self.getNamedDestRoot()
        nd.extend([dest['/Title'], destRef])
-        
-        return destRef      
+
+        return destRef

    def addNamedDestination(self, title, pagenum):
        pageRef = self.getObject(self._pages)['/Kids'][pagenum]
@ -551,12 +566,12 @@ class PdfFileWriter(object):
            NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
            NameObject('/S') : NameObject('/GoTo')
        })
-        
+
        destRef = self._addObject(dest)
        nd = self.getNamedDestRoot()

        nd.extend([title, destRef])
-        
+
        return destRef

    def removeLinks(self):
@ -714,7 +729,7 @@ class PdfFileWriter(object):
                borderArr.append(dashPattern)
        else:
            borderArr = [NumberObject(0)] * 3
-            
+
        if isinstance(rect, Str):
            rect = NameObject(rect)
        elif isinstance(rect, RectangleObject):
@ -739,12 +754,12 @@ class PdfFileWriter(object):
            pageRef[NameObject('/Annots')] = ArrayObject([lnkRef])

    _valid_layouts = ['/NoLayout', '/SinglePage', '/OneColumn', '/TwoColumnLeft', '/TwoColumnRight', '/TwoPageLeft', '/TwoPageRight']
-    
+
    def getPageLayout(self):
        """
        Get the page layout.
        See :meth:`setPageLayout()<PdfFileWriter.setPageLayout>` for a description of valid layouts.
-        
+
        :return: Page layout currently being used.
        :rtype: str, None if not specified
        """
@ -752,13 +767,13 @@ class PdfFileWriter(object):
            return self.getObject(self._root)['/PageLayout']
        except KeyError:
            return None
-        
+
    def setPageLayout(self, layout):
        """
        Set the page layout

        :param str layout: The page layout to be used
-        
+
        Valid layouts are:
             /NoLayout        Layout explicitly not specified
             /SinglePage      Show one page at a time
@ -774,7 +789,7 @@ class PdfFileWriter(object):
            layout = NameObject(layout)
        root = self.getObject(self._root)
        root.update({NameObject('/PageLayout'): layout})
-    
+
    pageLayout = property(getPageLayout, setPageLayout)
    """Read and write property accessing the :meth:`getPageLayout()<PdfFileWriter.getPageLayout>`
    and :meth:`setPageLayout()<PdfFileWriter.setPageLayout>` methods."""
@ -786,7 +801,7 @@ class PdfFileWriter(object):
        Get the page mode.
        See :meth:`setPageMode()<PdfFileWriter.setPageMode>` for a description
        of valid modes.
-        
+
        :return: Page mode currently being used.
        :rtype: str, None if not specified
        """
@ -800,7 +815,7 @@ class PdfFileWriter(object):
        Set the page mode.

        :param str mode: The page mode to use.
-        
+
        Valid modes are:
            /UseNone         Do not show outlines or thumbnails panels
            /UseOutlines     Show outlines (aka bookmarks) panel
@ -815,7 +830,7 @@ class PdfFileWriter(object):
            mode = NameObject(mode)
        root = self.getObject(self._root)
        root.update({NameObject('/PageMode'): mode})
-    
+
    pageMode = property(getPageMode, setPageMode)
    """Read and write property accessing the :meth:`getPageMode()<PdfFileWriter.getPageMode>`
    and :meth:`setPageMode()<PdfFileWriter.setPageMode>` methods."""
@ -915,8 +930,8 @@ class PdfFileReader(object):
        :raises PdfReadError: if file is encrypted and restrictions prevent
            this action.
        """
-    
-        # Flattened pages will not work on an Encrypted PDF; 
+
+        # Flattened pages will not work on an Encrypted PDF;
        # the PDF file's page count is used in this case. Otherwise,
        # the original method (flattened page count) is used.
        if self.isEncrypted:
@ -971,7 +986,7 @@ class PdfFileReader(object):
        if retval == None:
            retval = {}
            catalog = self.trailer["/Root"]
-            
+
            # get the name tree
            if "/Dests" in catalog:
                tree = catalog["/Dests"]
@ -979,7 +994,7 @@ class PdfFileReader(object):
                names = catalog['/Names']
                if "/Dests" in names:
                    tree = names['/Dests']
-        
+
        if tree == None:
            return retval

@ -1016,17 +1031,17 @@ class PdfFileReader(object):
        if outlines == None:
            outlines = []
            catalog = self.trailer["/Root"]
-            
+
            # get the outline dictionary and named destinations
            if "/Outlines" in catalog:
                lines = catalog["/Outlines"]
                if "/First" in lines:
                    node = lines["/First"]
            self._namedDests = self.getNamedDestinations()
-            
+
        if node == None:
          return outlines
-          
+
        # see if there are any more outlines
        while True:
            outline = self._buildOutline(node)
@ -1050,10 +1065,10 @@ class PdfFileReader(object):
        page, typ = array[0:2]
        array = array[2:]
        return Destination(title, page, typ, *array)
-          
+
    def _buildOutline(self, node):
        dest, title, outline = None, None, None
-        
+
        if "/A" in node and "/Title" in node:
            # Action, section 8.5 (only type GoTo supported)
            title  = node["/Title"]
@ -1097,7 +1112,7 @@ class PdfFileReader(object):
            return self.trailer['/Root']['/PageLayout']
        except KeyError:
            return None
-    
+
    pageLayout = property(getPageLayout)
    """Read-only property accessing the
    :meth:`getPageLayout()<PdfFileReader.getPageLayout>` method."""
@ -1107,7 +1122,7 @@ class PdfFileReader(object):
        Get the page mode.
        See :meth:`setPageMode()<PdfFileWriter.setPageMode>`
        for a description of valid modes.
-        
+
        :return: Page mode currently being used.
        :rtype: ``str``, ``None`` if not specified
        """
@ -1197,20 +1212,20 @@ class PdfFileReader(object):
                warnings.warn("Invalid stream (index %d) within object %d %d: %s" % \
                      (i, indirectReference.idnum, indirectReference.generation, e), utils.PdfReadWarning)

-                if self.strict: 
+                if self.strict:
                    raise utils.PdfReadError("Can't read object stream: %s"%e)
                # Replace with null. Hopefully it's nothing important.
                obj = NullObject()
            return obj
-        
+
        if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.")
        return NullObject()
-        
-        
+
+
    def getObject(self, indirectReference):
        debug = False
        if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation))
-        retval = self.cacheGetIndirectObject(indirectReference.generation, 
+        retval = self.cacheGetIndirectObject(indirectReference.generation,
                                                indirectReference.idnum)
        if retval != None:
            return retval
@ -1225,11 +1240,11 @@ class PdfFileReader(object):
            idnum, generation = self.readObjectHeader(self.stream)
            if idnum != indirectReference.idnum and self.xrefIndex:
                # Xref table probably had bad indexes due to not being zero-indexed
-                if self.strict: 
+                if self.strict:
                    raise utils.PdfReadError("Expected object ID (%d %d) does not match actual (%d %d); xref table not zero-indexed." \
                                     % (indirectReference.idnum, indirectReference.generation, idnum, generation))
                else: pass # xref table is corrected in non-strict mode
-            elif idnum != indirectReference.idnum: 
+            elif idnum != indirectReference.idnum:
                # some other problem
                raise utils.PdfReadError("Expected object ID (%d %d) does not match actual (%d %d)." \
                                         % (indirectReference.idnum, indirectReference.generation, idnum, generation))
@ -1253,9 +1268,9 @@ class PdfFileReader(object):
        else:
            warnings.warn("Object %d %d not defined."%(indirectReference.idnum,
                        indirectReference.generation), utils.PdfReadWarning)
-            #if self.strict: 
+            #if self.strict:
            raise utils.PdfReadError("Could not find object.")
-        self.cacheIndirectObject(indirectReference.generation, 
+        self.cacheIndirectObject(indirectReference.generation,
                    indirectReference.idnum, retval)
        return retval

@ -1286,7 +1301,7 @@ class PdfFileReader(object):
        obj = stream.read(3)
        readNonWhitespace(stream)
        stream.seek(-1, 1)
-        if (extra and self.strict): 
+        if (extra and self.strict):
            #not a fatal error
            warnings.warn("Superfluous whitespace found in object header %s %s" % \
                          (idnum, generation), utils.PdfReadWarning)
@ -1298,7 +1313,7 @@ class PdfFileReader(object):
        if debug and out: print(("cache hit: %d %d"%(idnum, generation)))
        elif debug: print(("cache miss: %d %d"%(idnum, generation)))
        return out
-    
+
    def cacheIndirectObject(self, generation, idnum, obj):
        # return None # Sometimes we want to turn off cache for debugging.
        if (generation, idnum) in self.resolvedObjects:
@ -1371,17 +1386,17 @@ class PdfFileReader(object):
                    cnt = 0
                    while cnt < size:
                        line = stream.read(20)
-                        
+
                        # It's very clear in section 3.4.3 of the PDF spec
                        # that all cross-reference table lines are a fixed
                        # 20 bytes (as of PDF 1.7). However, some files have
                        # 21-byte entries (or more) due to the use of \r\n
-                        # (CRLF) EOL's. Detect that case, and adjust the line 
+                        # (CRLF) EOL's. Detect that case, and adjust the line
                        # until it does not begin with a \r (CR) or \n (LF).
                        while line[0] in b_("\x0D\x0A"):
                            stream.seek(-20 + 1, 1)
                            line = stream.read(20)
-                        
+
                        # On the other hand, some malformed PDF files
                        # use a single character EOL without a preceeding
                        # space.  Detect that case, and seek the stream
@ -1390,7 +1405,7 @@ class PdfFileReader(object):
                        # text "trailer"):
                        if line[-1] in b_("0123456789t"):
                            stream.seek(-1, 1)
-                            
+
                        offset, generation = line[:16].split(b_(" "))
                        offset, generation = int(offset), int(generation)
                        if generation not in self.xref:
@ -1431,7 +1446,7 @@ class PdfFileReader(object):
                assert xrefstream["/Type"] == "/XRef"
                self.cacheIndirectObject(generation, idnum, xrefstream)
                streamData = BytesIO(b_(xrefstream.getData()))
-                # Index pairs specify the subsections in the dictionary. If 
+                # Index pairs specify the subsections in the dictionary. If
                # none create one subsection that spans everything.
                idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
                if debug: print(("read idx_pairs=%s"%list(self._pairs(idx_pairs))))
@ -1445,17 +1460,17 @@ class PdfFileReader(object):
                    if entrySizes[i] > 0:
                        d = streamData.read(entrySizes[i])
                        return convertToInt(d, entrySizes[i])
-                    
-                    # PDF Spec Table 17: A value of zero for an element in the 
+
+                    # PDF Spec Table 17: A value of zero for an element in the
                    # W array indicates...the default value shall be used
                    if i == 0:  return 1 # First value defaults to 1
                    else:       return 0
-                
+
                def used_before(num, generation):
                    # We move backwards through the xrefs, don't replace any.
                    return num in self.xref.get(generation, []) or \
                            num in self.xref_objStm
-                    
+
                # Iterate through each subsection
                last_end = 0
                for start, size in self._pairs(idx_pairs):
@ -1492,7 +1507,7 @@ class PdfFileReader(object):
                        elif self.strict:
                            raise utils.PdfReadError("Unknown xref type: %s"%
                                                        xref_type)
-                            
+
                trailerKeys = "/Root", "/Encrypt", "/Info", "/ID"
                for key in trailerKeys:
                    if key in xrefstream and key not in self.trailer:
@ -1542,10 +1557,10 @@ class PdfFileReader(object):
                    #if not, then either it's just plain wrong, or the non-zero-index is actually correct
            stream.seek(loc, 0) #return to where it was

-    
+
    def _zeroXref(self, generation):
        self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) )
-            
+
    def _pairs(self, array):
        i = 0
        while True:
@ -1810,7 +1825,7 @@ class PageObject(DictionaryObject):

    def _pushPopGS(contents, pdf):
        # adds a graphics state "push" and "pop" to the beginning and end
-        # of a content stream.  This isolates it from changes such as 
+        # of a content stream.  This isolates it from changes such as
        # transformation matricies.
        stream = ContentStream(contents, pdf)
        stream.operations.insert(0, [[], "q"])
@ -1892,12 +1907,12 @@ class PageObject(DictionaryObject):
                page2Content, rename, self.pdf)
            page2Content = PageObject._pushPopGS(page2Content, self.pdf)
            newContentArray.append(page2Content)
-        
+
        # if expanding the page to fit a new page, calculate the new media box size
        if expand:
-            corners1 = [self.mediaBox.getLowerLeft_x().as_numeric(), self.mediaBox.getLowerLeft_y().as_numeric(), 
+            corners1 = [self.mediaBox.getLowerLeft_x().as_numeric(), self.mediaBox.getLowerLeft_y().as_numeric(),
                        self.mediaBox.getUpperRight_x().as_numeric(), self.mediaBox.getUpperRight_y().as_numeric()]
-            corners2 = [page2.mediaBox.getLowerLeft_x().as_numeric(), page2.mediaBox.getLowerLeft_y().as_numeric(), 
+            corners2 = [page2.mediaBox.getLowerLeft_x().as_numeric(), page2.mediaBox.getLowerLeft_y().as_numeric(),
                        page2.mediaBox.getUpperLeft_x().as_numeric(), page2.mediaBox.getUpperLeft_y().as_numeric(),
                        page2.mediaBox.getUpperRight_x().as_numeric(), page2.mediaBox.getUpperRight_y().as_numeric(),
                        page2.mediaBox.getLowerRight_x().as_numeric(), page2.mediaBox.getLowerRight_y().as_numeric()]
@ -2554,24 +2569,24 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
    # described in Algorithm 3.2.
    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
    # 2. Initialize the MD5 hash function and pass the 32-byte padding string
-    # shown in step 1 of Algorithm 3.2 as input to this function. 
+    # shown in step 1 of Algorithm 3.2 as input to this function.
    m = md5()
    m.update(_encryption_padding)
    # 3. Pass the first element of the file's file identifier array (the value
    # of the ID entry in the document's trailer dictionary; see Table 3.13 on
    # page 73) to the hash function and finish the hash.  (See implementation
-    # note 25 in Appendix H.) 
+    # note 25 in Appendix H.)
    m.update(id1_entry.original_bytes)
    md5_hash = m.digest()
    # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
-    # function with the encryption key from step 1. 
+    # function with the encryption key from step 1.
    val = utils.RC4_encrypt(key, md5_hash)
    # 5. Do the following 19 times: Take the output from the previous
    # invocation of the RC4 function and pass it as input to a new invocation
    # of the function; use an encryption key generated by taking each byte of
    # the original encryption key (obtained in step 2) and performing an XOR
    # operation between that byte and the single-byte value of the iteration
-    # counter (from 1 to 19). 
+    # counter (from 1 to 19).
    for i in range(1, 20):
        new_key = b_('')
        for l in range(len(key)):
@ -2579,7 +2594,7 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
        val = utils.RC4_encrypt(new_key, val)
    # 6. Append 16 bytes of arbitrary padding to the output from the final
    # invocation of the RC4 function and store the 32-byte result as the value
-    # of the U entry in the encryption dictionary. 
+    # of the U entry in the encryption dictionary.
    # (implementator note: I don't know what "arbitrary padding" is supposed to
    # mean, so I have used null bytes.  This seems to match a few other
    # people's implementations)
--- a/Sample_Code/basic_features.py
+++ b/Sample_Code/basic_features.py
@ -2,7 +2,7 @@ from PyPDF2 import PdfFileWriter, PdfFileReader

 output = PdfFileWriter()
 input1 = PdfFileReader(open("document1.pdf", "rb"))
-    
+
 # print how many pages input1 has:
 print "document1.pdf has %d pages." % input1.getNumPages()

@ -21,7 +21,7 @@ page4 = input1.getPage(3)
 watermark = PdfFileReader(open("watermark.pdf", "rb"))
 page4.mergePage(watermark.getPage(0))
 output.addPage(page4)
-    
+

 # add page 5 from input1, but crop it to half size:
 page5 = input1.getPage(4)
@ -31,6 +31,11 @@ page5.mediaBox.upperRight = (
 )
 output.addPage(page5)

+# add some Javascript to launch the print window on opening this PDF.
+# the password dialog may prevent the print dialog from being shown,
+# comment the the encription lines, if that's the case, to try this out
+output.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
+
 # encrypt your new PDF and add a password
 password = "secret"
 output.encrypt(password)