From 6d94e5bf56487279ade7219fe6b1fc10dd080473 Mon Sep 17 00:00:00 2001
From: switham <github@mac-guyver.com>
Date: Thu, 9 Jan 2014 14:51:55 -0500
Subject: [PATCH] pdfcat and pagerange.py: error handling, stdout, misc.

modified:   LICENSE
    Added Steve Witham <switham_github@mac-guyver.com>

modified:   PyPDF2/pagerange.py
    Fixed some error handling.
    Made parse_filename_page_ranges return a list instead of being a generator.
    Tweaked a variable name for readability.

renamed:    Sample_Code/makepages.py -> Sample_Code/makesimple.py
also modified:
    Renamed the code to be similar to makesimple.sh.
    Changed the output file names to be the same as makesimple.sh makes.

modified:   Sample_Code/pdfcat
    Fixed to allow sending output to a file by redirection like with cat,
         pdfcat input*.pdf >output.pdf
    using
         output = os.fdopen(stdout.fileno(), "wb")
    I have not tested this in Windows, but the Python docs say the calls are
    supported in Windows.  The worst problem this could cause would be to
    silence PyPDF2's existing warning about the file being in text mode,
    while still actually writing in Windows text mode.
    Added examples to the --help output to show output-redirect style.

    Took out the caching of open input files.  Needless complication.

    Added a try...except so that if there's a problem while processing an
    input file, print the filename.

    Made quiet the default and --verbose an option.
---
 LICENSE                                     |  1 +
 PyPDF2/pagerange.py                         | 26 ++++++-----
 Sample_Code/{makepages.py => makesimple.py} |  4 +-
 Sample_Code/pdfcat                          | 51 ++++++++++++---------
 4 files changed, 47 insertions(+), 35 deletions(-)
 rename Sample_Code/{makepages.py => makesimple.py} (90%)

diff --git a/LICENSE b/LICENSE
index e058995..ab327d0 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,5 +1,6 @@
 Copyright (c) 2006-2008, Mathieu Fenniak
 Some contributions copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+Some contributions copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>
 
 All rights reserved.
 
diff --git a/PyPDF2/pagerange.py b/PyPDF2/pagerange.py
index 1e5554c..0639175 100644
--- a/PyPDF2/pagerange.py
+++ b/PyPDF2/pagerange.py
@@ -122,27 +122,29 @@ PAGE_RANGE_ALL = PageRange(":")  # The range of all pages.
 
 
 def parse_filename_page_ranges(args):
-    """ 
-    Generate a sequence of (filename, page_range) pairs from a list
-    of filenames and page ranges.
+    """
+    Given a list of filenames and page ranges, return a list of
+    (filename, page_range) pairs.
     First arg must be a filename; other ags are filenames, page-range 
     expressions, slice objects, or PageRange objects.
-    A filename not followed by page range indicates all pages of the file.
-    Yields tuples like (pdf_filename, page_range).
+    A filename not followed by a page range indicates all pages of the file.
     """
+    pairs = []
     pdf_filename = None
-    did_some = False
+    did_page_range = False
     for arg in args + [None]:
         if PageRange.valid(arg):
             if not pdf_filename:
-                raise Error("First argument should be a filename.")
+                raise ValueError("The first argument must be a filename, " \
+                                 "not a page range.")
 
-            yield (pdf_filename, PageRange(arg))
-            did_some = True
+            pairs.append( (pdf_filename, PageRange(arg)) )
+            did_page_range = True
         else:
             # New filename or end of list--do all of the previous file?
-            if pdf_filename and not did_some:
-                yield (pdf_filename, PAGE_RANGE_ALL)
+            if pdf_filename and not did_page_range:
+                pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
                     
             pdf_filename = arg
-            did_some = False
+            did_page_range = False
+    return pairs
diff --git a/Sample_Code/makepages.py b/Sample_Code/makesimple.py
similarity index 90%
rename from Sample_Code/makepages.py
rename to Sample_Code/makesimple.py
index ab20088..6594b97 100755
--- a/Sample_Code/makepages.py
+++ b/Sample_Code/makesimple.py
@@ -10,7 +10,7 @@ inch = 72
 TEXT = """%s    page %d of %d
 
 a wonderful file
-created with Sample_Code/makepages.py"""
+created with Sample_Code/makesimple.py"""
 
 def make_pdf_file(output_filename, np):
     title = output_filename
@@ -30,6 +30,6 @@ if __name__ == "__main__":
     nps = [None, 5, 11, 17]
     for i, np in enumerate(nps):
         if np:
-            filename = "simplest_%d.pdf" % i
+            filename = "simple%d.pdf" % i
             make_pdf_file(filename, np)
             print "Wrote", filename
diff --git a/Sample_Code/pdfcat b/Sample_Code/pdfcat
index c694ec1..4f19d69 100755
--- a/Sample_Code/pdfcat
+++ b/Sample_Code/pdfcat
@@ -7,10 +7,16 @@ A file not followed by a page range means all the pages of the file.
 
 PAGE RANGES are like Python slices.
         {page_range_help}
-EXAMPLE
+EXAMPLES
     pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1
-    Concatenate all of head.pdf, all but page seven of content.pdf, 
-    and the last page of tail.pdf, producing output.pdf.
+        Concatenate all of head.pdf, all but page seven of content.pdf, 
+        and the last page of tail.pdf, producing output.pdf.
+
+    pdfcat chapter*.pdf >book.pdf
+        You can specify the output file by redirection.
+
+    pdfcat chapter?.pdf chapter10.pdf >book.pdf
+        In case you don't want chapter 10 before chapter 2.
 """
 # Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
 # All rights reserved. This software is available under a BSD license;
@@ -25,33 +31,36 @@ def parse_args():
         formatter_class=argparse.RawDescriptionHelpFormatter)
     parser.add_argument("args", nargs="+",
                         metavar="filename or page range expression")
-    parser.add_argument("-o", "--output", required=True,
+    parser.add_argument("-o", "--output",
                         metavar="output_file")
-    parser.add_argument("-q", "--quiet", action="store_true")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                        help="show page ranges as they are being read")
     return parser.parse_args()
 
-from sys import argv, stderr, stdout, exit
-import re
+from sys import stderr, stdout, exit
+import os
 import traceback
 from PyPDF2 import PdfFileMerger, parse_filename_page_ranges
 
 
 if __name__ == "__main__":
     args = parse_args()
-    verbose = not(args.quiet)
-
     filename_page_ranges = parse_filename_page_ranges(args.args)
+
     merger = PdfFileMerger()
-    input_files = {}
-    for (filename, page_range) in filename_page_ranges:
-        if verbose:
-            print >>stderr, filename, page_range
-        if filename not in input_files:
-            input_files[filename] = open(filename, "rb")
-        merger.append(input_files[filename], pages=page_range)
-    for f in input_files.values():
-        f.close()
-    output = open(args.output, "wb")
+    try:
+        for (filename, page_range) in filename_page_ranges:
+            if args.verbose:
+                print >>stderr, filename, page_range
+            with open(filename, "rb") as f:
+                merger.append(f, pages=page_range)
+    except:
+        print >>stderr, traceback.format_exc()
+        print >>stderr, "Error while reading " + filename
+        exit(1)
+    if args.output:
+        output = open(args.output, "wb")
+    else:
+        stdout.flush()
+        output = os.fdopen(stdout.fileno(), "wb")
     merger.write(output)
-
-