From 6d94e5bf56487279ade7219fe6b1fc10dd080473 Mon Sep 17 00:00:00 2001 From: switham Date: Thu, 9 Jan 2014 14:51:55 -0500 Subject: [PATCH] pdfcat and pagerange.py: error handling, stdout, misc. modified: LICENSE Added Steve Witham modified: PyPDF2/pagerange.py Fixed some error handling. Made parse_filename_page_ranges return a list instead of being a generator. Tweaked a variable name for readability. renamed: Sample_Code/makepages.py -> Sample_Code/makesimple.py also modified: Renamed the code to be similar to makesimple.sh. Changed the output file names to be the same as makesimple.sh makes. modified: Sample_Code/pdfcat Fixed to allow sending output to a file by redirection like with cat, pdfcat input*.pdf >output.pdf using output = os.fdopen(stdout.fileno(), "wb") I have not tested this in Windows, but the Python docs say the calls are supported in Windows. The worst problem this could cause would be to silence PyPDF2's existing warning about the file being in text mode, while still actually writing in Windows text mode. Added examples to the --help output to show output-redirect style. Took out the caching of open input files. Needless complication. Added a try...except so that if there's a problem while processing an input file, print the filename. Made quiet the default and --verbose an option. --- LICENSE | 1 + PyPDF2/pagerange.py | 26 ++++++----- Sample_Code/{makepages.py => makesimple.py} | 4 +- Sample_Code/pdfcat | 51 ++++++++++++--------- 4 files changed, 47 insertions(+), 35 deletions(-) rename Sample_Code/{makepages.py => makesimple.py} (90%) diff --git a/LICENSE b/LICENSE index e058995..ab327d0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,6 @@ Copyright (c) 2006-2008, Mathieu Fenniak Some contributions copyright (c) 2007, Ashish Kulkarni +Some contributions copyright (c) 2014, Steve Witham All rights reserved. diff --git a/PyPDF2/pagerange.py b/PyPDF2/pagerange.py index 1e5554c..0639175 100644 --- a/PyPDF2/pagerange.py +++ b/PyPDF2/pagerange.py @@ -122,27 +122,29 @@ PAGE_RANGE_ALL = PageRange(":") # The range of all pages. def parse_filename_page_ranges(args): - """ - Generate a sequence of (filename, page_range) pairs from a list - of filenames and page ranges. + """ + Given a list of filenames and page ranges, return a list of + (filename, page_range) pairs. First arg must be a filename; other ags are filenames, page-range expressions, slice objects, or PageRange objects. - A filename not followed by page range indicates all pages of the file. - Yields tuples like (pdf_filename, page_range). + A filename not followed by a page range indicates all pages of the file. """ + pairs = [] pdf_filename = None - did_some = False + did_page_range = False for arg in args + [None]: if PageRange.valid(arg): if not pdf_filename: - raise Error("First argument should be a filename.") + raise ValueError("The first argument must be a filename, " \ + "not a page range.") - yield (pdf_filename, PageRange(arg)) - did_some = True + pairs.append( (pdf_filename, PageRange(arg)) ) + did_page_range = True else: # New filename or end of list--do all of the previous file? - if pdf_filename and not did_some: - yield (pdf_filename, PAGE_RANGE_ALL) + if pdf_filename and not did_page_range: + pairs.append( (pdf_filename, PAGE_RANGE_ALL) ) pdf_filename = arg - did_some = False + did_page_range = False + return pairs diff --git a/Sample_Code/makepages.py b/Sample_Code/makesimple.py similarity index 90% rename from Sample_Code/makepages.py rename to Sample_Code/makesimple.py index ab20088..6594b97 100755 --- a/Sample_Code/makepages.py +++ b/Sample_Code/makesimple.py @@ -10,7 +10,7 @@ inch = 72 TEXT = """%s page %d of %d a wonderful file -created with Sample_Code/makepages.py""" +created with Sample_Code/makesimple.py""" def make_pdf_file(output_filename, np): title = output_filename @@ -30,6 +30,6 @@ if __name__ == "__main__": nps = [None, 5, 11, 17] for i, np in enumerate(nps): if np: - filename = "simplest_%d.pdf" % i + filename = "simple%d.pdf" % i make_pdf_file(filename, np) print "Wrote", filename diff --git a/Sample_Code/pdfcat b/Sample_Code/pdfcat index c694ec1..4f19d69 100755 --- a/Sample_Code/pdfcat +++ b/Sample_Code/pdfcat @@ -7,10 +7,16 @@ A file not followed by a page range means all the pages of the file. PAGE RANGES are like Python slices. {page_range_help} -EXAMPLE +EXAMPLES pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1 - Concatenate all of head.pdf, all but page seven of content.pdf, - and the last page of tail.pdf, producing output.pdf. + Concatenate all of head.pdf, all but page seven of content.pdf, + and the last page of tail.pdf, producing output.pdf. + + pdfcat chapter*.pdf >book.pdf + You can specify the output file by redirection. + + pdfcat chapter?.pdf chapter10.pdf >book.pdf + In case you don't want chapter 10 before chapter 2. """ # Copyright (c) 2014, Steve Witham . # All rights reserved. This software is available under a BSD license; @@ -25,33 +31,36 @@ def parse_args(): formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("args", nargs="+", metavar="filename or page range expression") - parser.add_argument("-o", "--output", required=True, + parser.add_argument("-o", "--output", metavar="output_file") - parser.add_argument("-q", "--quiet", action="store_true") + parser.add_argument("-v", "--verbose", action="store_true", + help="show page ranges as they are being read") return parser.parse_args() -from sys import argv, stderr, stdout, exit -import re +from sys import stderr, stdout, exit +import os import traceback from PyPDF2 import PdfFileMerger, parse_filename_page_ranges if __name__ == "__main__": args = parse_args() - verbose = not(args.quiet) - filename_page_ranges = parse_filename_page_ranges(args.args) + merger = PdfFileMerger() - input_files = {} - for (filename, page_range) in filename_page_ranges: - if verbose: - print >>stderr, filename, page_range - if filename not in input_files: - input_files[filename] = open(filename, "rb") - merger.append(input_files[filename], pages=page_range) - for f in input_files.values(): - f.close() - output = open(args.output, "wb") + try: + for (filename, page_range) in filename_page_ranges: + if args.verbose: + print >>stderr, filename, page_range + with open(filename, "rb") as f: + merger.append(f, pages=page_range) + except: + print >>stderr, traceback.format_exc() + print >>stderr, "Error while reading " + filename + exit(1) + if args.output: + output = open(args.output, "wb") + else: + stdout.flush() + output = os.fdopen(stdout.fileno(), "wb") merger.write(output) - -