pdfcat and pagerange.py: error handling, stdout, misc.

modified: LICENSE Added Steve Witham <switham_github@mac-guyver.com> modified: PyPDF2/pagerange.py Fixed some error handling. Made parse_filename_page_ranges return a list instead of being a generator. Tweaked a variable name for readability. renamed: Sample_Code/makepages.py -> Sample_Code/makesimple.py also modified: Renamed the code to be similar to makesimple.sh. Changed the output file names to be the same as makesimple.sh makes. modified: Sample_Code/pdfcat Fixed to allow sending output to a file by redirection like with cat, pdfcat input*.pdf >output.pdf using output = os.fdopen(stdout.fileno(), "wb") I have not tested this in Windows, but the Python docs say the calls are supported in Windows. The worst problem this could cause would be to silence PyPDF2's existing warning about the file being in text mode, while still actually writing in Windows text mode. Added examples to the --help output to show output-redirect style. Took out the caching of open input files. Needless complication. Added a try...except so that if there's a problem while processing an input file, print the filename. Made quiet the default and --verbose an option.
2014-01-09 14:51:55 -05:00 · 2014-01-09 14:51:55 -05:00 · 6d94e5bf56
parent 201ad9f364
commit 6d94e5bf56
4 changed files with 47 additions and 35 deletions
--- a/1
+++ b/1
@ -1,5 +1,6 @@
 Copyright (c) 2006-2008, Mathieu Fenniak
 Some contributions copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+Some contributions copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>

 All rights reserved.

--- a/PyPDF2/pagerange.py
+++ b/PyPDF2/pagerange.py
@ -122,27 +122,29 @@ PAGE_RANGE_ALL = PageRange(":")  # The range of all pages.


 def parse_filename_page_ranges(args):
-    """ 
-    Generate a sequence of (filename, page_range) pairs from a list
-    of filenames and page ranges.
+    """
+    Given a list of filenames and page ranges, return a list of
+    (filename, page_range) pairs.
    First arg must be a filename; other ags are filenames, page-range 
    expressions, slice objects, or PageRange objects.
-    A filename not followed by page range indicates all pages of the file.
-    Yields tuples like (pdf_filename, page_range).
+    A filename not followed by a page range indicates all pages of the file.
    """
+    pairs = []
    pdf_filename = None
-    did_some = False
+    did_page_range = False
    for arg in args + [None]:
        if PageRange.valid(arg):
            if not pdf_filename:
-                raise Error("First argument should be a filename.")
+                raise ValueError("The first argument must be a filename, " \
+                                 "not a page range.")

-            yield (pdf_filename, PageRange(arg))
-            did_some = True
+            pairs.append( (pdf_filename, PageRange(arg)) )
+            did_page_range = True
        else:
            # New filename or end of list--do all of the previous file?
-            if pdf_filename and not did_some:
-                yield (pdf_filename, PAGE_RANGE_ALL)
+            if pdf_filename and not did_page_range:
+                pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
                    
            pdf_filename = arg
-            did_some = False
+            did_page_range = False
+    return pairs
--- a/Sample_Code/makesimple.py
+++ b/Sample_Code/makesimple.py
@ -10,7 +10,7 @@ inch = 72
 TEXT = """%s    page %d of %d

 a wonderful file
-created with Sample_Code/makepages.py"""
+created with Sample_Code/makesimple.py"""

 def make_pdf_file(output_filename, np):
    title = output_filename
@ -30,6 +30,6 @@ if __name__ == "__main__":
    nps = [None, 5, 11, 17]
    for i, np in enumerate(nps):
        if np:
-            filename = "simplest_%d.pdf" % i
+            filename = "simple%d.pdf" % i
            make_pdf_file(filename, np)
            print "Wrote", filename
--- a/Sample_Code/pdfcat
+++ b/Sample_Code/pdfcat
@ -7,10 +7,16 @@ A file not followed by a page range means all the pages of the file.

 PAGE RANGES are like Python slices.
        {page_range_help}
-EXAMPLE
+EXAMPLES
    pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1
-    Concatenate all of head.pdf, all but page seven of content.pdf, 
-    and the last page of tail.pdf, producing output.pdf.
+        Concatenate all of head.pdf, all but page seven of content.pdf, 
+        and the last page of tail.pdf, producing output.pdf.
+
+    pdfcat chapter*.pdf >book.pdf
+        You can specify the output file by redirection.
+
+    pdfcat chapter?.pdf chapter10.pdf >book.pdf
+        In case you don't want chapter 10 before chapter 2.
 """
 # Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
 # All rights reserved. This software is available under a BSD license;
@ -25,33 +31,36 @@ def parse_args():
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("args", nargs="+",
                        metavar="filename or page range expression")
-    parser.add_argument("-o", "--output", required=True,
+    parser.add_argument("-o", "--output",
                        metavar="output_file")
-    parser.add_argument("-q", "--quiet", action="store_true")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                        help="show page ranges as they are being read")
    return parser.parse_args()

-from sys import argv, stderr, stdout, exit
-import re
+from sys import stderr, stdout, exit
+import os
 import traceback
 from PyPDF2 import PdfFileMerger, parse_filename_page_ranges


 if __name__ == "__main__":
    args = parse_args()
-    verbose = not(args.quiet)
-
    filename_page_ranges = parse_filename_page_ranges(args.args)
+
    merger = PdfFileMerger()
-    input_files = {}
-    for (filename, page_range) in filename_page_ranges:
-        if verbose:
-            print >>stderr, filename, page_range
-        if filename not in input_files:
-            input_files[filename] = open(filename, "rb")
-        merger.append(input_files[filename], pages=page_range)
-    for f in input_files.values():
-        f.close()
-    output = open(args.output, "wb")
+    try:
+        for (filename, page_range) in filename_page_ranges:
+            if args.verbose:
+                print >>stderr, filename, page_range
+            with open(filename, "rb") as f:
+                merger.append(f, pages=page_range)
+    except:
+        print >>stderr, traceback.format_exc()
+        print >>stderr, "Error while reading " + filename
+        exit(1)
+    if args.output:
+        output = open(args.output, "wb")
+    else:
+        stdout.flush()
+        output = os.fdopen(stdout.fileno(), "wb")
    merger.write(output)
-
-