pdfcat and pagerange.py: error handling, stdout, misc.

modified:   LICENSE
    Added Steve Witham <switham_github@mac-guyver.com>

modified:   PyPDF2/pagerange.py
    Fixed some error handling.
    Made parse_filename_page_ranges return a list instead of being a generator.
    Tweaked a variable name for readability.

renamed:    Sample_Code/makepages.py -> Sample_Code/makesimple.py
also modified:
    Renamed the code to be similar to makesimple.sh.
    Changed the output file names to be the same as makesimple.sh makes.

modified:   Sample_Code/pdfcat
    Fixed to allow sending output to a file by redirection like with cat,
         pdfcat input*.pdf >output.pdf
    using
         output = os.fdopen(stdout.fileno(), "wb")
    I have not tested this in Windows, but the Python docs say the calls are
    supported in Windows.  The worst problem this could cause would be to
    silence PyPDF2's existing warning about the file being in text mode,
    while still actually writing in Windows text mode.
    Added examples to the --help output to show output-redirect style.

    Took out the caching of open input files.  Needless complication.

    Added a try...except so that if there's a problem while processing an
    input file, print the filename.

    Made quiet the default and --verbose an option.
This commit is contained in:
switham 2014-01-09 14:51:55 -05:00
parent 201ad9f364
commit 6d94e5bf56
4 changed files with 47 additions and 35 deletions

View File

@ -1,5 +1,6 @@
Copyright (c) 2006-2008, Mathieu Fenniak
Some contributions copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
Some contributions copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>
All rights reserved.

View File

@ -122,27 +122,29 @@ PAGE_RANGE_ALL = PageRange(":") # The range of all pages.
def parse_filename_page_ranges(args):
"""
Generate a sequence of (filename, page_range) pairs from a list
of filenames and page ranges.
"""
Given a list of filenames and page ranges, return a list of
(filename, page_range) pairs.
First arg must be a filename; other ags are filenames, page-range
expressions, slice objects, or PageRange objects.
A filename not followed by page range indicates all pages of the file.
Yields tuples like (pdf_filename, page_range).
A filename not followed by a page range indicates all pages of the file.
"""
pairs = []
pdf_filename = None
did_some = False
did_page_range = False
for arg in args + [None]:
if PageRange.valid(arg):
if not pdf_filename:
raise Error("First argument should be a filename.")
raise ValueError("The first argument must be a filename, " \
"not a page range.")
yield (pdf_filename, PageRange(arg))
did_some = True
pairs.append( (pdf_filename, PageRange(arg)) )
did_page_range = True
else:
# New filename or end of list--do all of the previous file?
if pdf_filename and not did_some:
yield (pdf_filename, PAGE_RANGE_ALL)
if pdf_filename and not did_page_range:
pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
pdf_filename = arg
did_some = False
did_page_range = False
return pairs

View File

@ -10,7 +10,7 @@ inch = 72
TEXT = """%s page %d of %d
a wonderful file
created with Sample_Code/makepages.py"""
created with Sample_Code/makesimple.py"""
def make_pdf_file(output_filename, np):
title = output_filename
@ -30,6 +30,6 @@ if __name__ == "__main__":
nps = [None, 5, 11, 17]
for i, np in enumerate(nps):
if np:
filename = "simplest_%d.pdf" % i
filename = "simple%d.pdf" % i
make_pdf_file(filename, np)
print "Wrote", filename

View File

@ -7,10 +7,16 @@ A file not followed by a page range means all the pages of the file.
PAGE RANGES are like Python slices.
{page_range_help}
EXAMPLE
EXAMPLES
pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1
Concatenate all of head.pdf, all but page seven of content.pdf,
and the last page of tail.pdf, producing output.pdf.
Concatenate all of head.pdf, all but page seven of content.pdf,
and the last page of tail.pdf, producing output.pdf.
pdfcat chapter*.pdf >book.pdf
You can specify the output file by redirection.
pdfcat chapter?.pdf chapter10.pdf >book.pdf
In case you don't want chapter 10 before chapter 2.
"""
# Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
# All rights reserved. This software is available under a BSD license;
@ -25,33 +31,36 @@ def parse_args():
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("args", nargs="+",
metavar="filename or page range expression")
parser.add_argument("-o", "--output", required=True,
parser.add_argument("-o", "--output",
metavar="output_file")
parser.add_argument("-q", "--quiet", action="store_true")
parser.add_argument("-v", "--verbose", action="store_true",
help="show page ranges as they are being read")
return parser.parse_args()
from sys import argv, stderr, stdout, exit
import re
from sys import stderr, stdout, exit
import os
import traceback
from PyPDF2 import PdfFileMerger, parse_filename_page_ranges
if __name__ == "__main__":
args = parse_args()
verbose = not(args.quiet)
filename_page_ranges = parse_filename_page_ranges(args.args)
merger = PdfFileMerger()
input_files = {}
for (filename, page_range) in filename_page_ranges:
if verbose:
print >>stderr, filename, page_range
if filename not in input_files:
input_files[filename] = open(filename, "rb")
merger.append(input_files[filename], pages=page_range)
for f in input_files.values():
f.close()
output = open(args.output, "wb")
try:
for (filename, page_range) in filename_page_ranges:
if args.verbose:
print >>stderr, filename, page_range
with open(filename, "rb") as f:
merger.append(f, pages=page_range)
except:
print >>stderr, traceback.format_exc()
print >>stderr, "Error while reading " + filename
exit(1)
if args.output:
output = open(args.output, "wb")
else:
stdout.flush()
output = os.fdopen(stdout.fileno(), "wb")
merger.write(output)