debian-pypdf4/PyPDF2/generic.py

1048 lines
37 KiB
Python

# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Implementation of generic PDF objects (dictionary, number, string, and so on)
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
import re
from utils import readNonWhitespace, RC4_encrypt
import filters
import utils
import decimal
import codecs
def readObject(stream, pdf):
tok = stream.read(1)
stream.seek(-1, 1) # reset to start
if tok == 't' or tok == 'f':
# boolean object
return BooleanObject.readFromStream(stream)
elif tok == '(':
# string object
return readStringFromStream(stream)
elif tok == '/':
# name object
return NameObject.readFromStream(stream)
elif tok == '[':
# array object
return ArrayObject.readFromStream(stream, pdf)
elif tok == 'n':
# null object
return NullObject.readFromStream(stream)
elif tok == '<':
# hexadecimal string OR dictionary
peek = stream.read(2)
stream.seek(-2, 1) # reset to start
if peek == '<<':
return DictionaryObject.readFromStream(stream, pdf)
else:
return readHexStringFromStream(stream)
elif tok == '%':
# comment
while tok not in ('\r', '\n'):
tok = stream.read(1)
tok = readNonWhitespace(stream)
stream.seek(-1, 1)
return readObject(stream, pdf)
else:
# number object OR indirect reference
if tok == '+' or tok == '-':
# number
return NumberObject.readFromStream(stream)
peek = stream.read(20)
stream.seek(-len(peek), 1) # reset to start
if re.match(r"(\d+)\s(\d+)\sR[^a-zA-Z]", peek) != None:
return IndirectObject.readFromStream(stream, pdf)
else:
return NumberObject.readFromStream(stream)
class PdfObject(object):
sweep_required = False
def getObject(self):
"""Resolves indirect references."""
return self
class NullObject(PdfObject):
def writeToStream(self, stream, encryption_key):
stream.write("null")
def readFromStream(stream):
nulltxt = stream.read(4)
if nulltxt != "null":
raise utils.PdfReadError, "error reading null object"
return NullObject()
readFromStream = staticmethod(readFromStream)
class BooleanObject(PdfObject):
def __init__(self, value):
self.value = value
def writeToStream(self, stream, encryption_key):
if self.value:
stream.write("true")
else:
stream.write("false")
def readFromStream(stream):
word = stream.read(4)
if word == "true":
return BooleanObject(True)
elif word == "fals":
stream.read(1)
return BooleanObject(False)
assert False
readFromStream = staticmethod(readFromStream)
class ArrayObject(list, PdfObject):
sweep_required = True
def writeToStream(self, stream, encryption_key):
stream.write("[")
for data in self:
stream.write(" ")
data.writeToStream(stream, encryption_key)
stream.write(" ]")
def readFromStream(stream, pdf):
arr = ArrayObject()
tmp = stream.read(1)
if tmp != "[":
raise utils.PdfReadError, "error reading array"
while True:
# skip leading whitespace
tok = stream.read(1)
while tok.isspace():
tok = stream.read(1)
stream.seek(-1, 1)
# check for array ending
peekahead = stream.read(1)
if peekahead == "]":
break
stream.seek(-1, 1)
# read and append obj
arr.append(readObject(stream, pdf))
return arr
readFromStream = staticmethod(readFromStream)
class IndirectObject(PdfObject):
sweep_required = True
def __init__(self, idnum, generation, pdf):
self.idnum = idnum
self.generation = generation
self.pdf = pdf
def getObject(self):
return self.pdf.getObject(self).getObject()
def __repr__(self):
return "IndirectObject(%r, %r)" % (self.idnum, self.generation)
def __eq__(self, other):
return (
other != None and
isinstance(other, IndirectObject) and
self.idnum == other.idnum and
self.generation == other.generation and
self.pdf is other.pdf
)
def __ne__(self, other):
return not self.__eq__(other)
def writeToStream(self, stream, encryption_key):
stream.write("%s %s R" % (self.idnum, self.generation))
def readFromStream(stream, pdf):
idnum = ""
while True:
tok = stream.read(1)
if tok.isspace():
break
idnum += tok
generation = ""
while True:
tok = stream.read(1)
if tok.isspace():
break
generation += tok
r = stream.read(1)
if r != "R":
raise utils.PdfReadError("error reading indirect object reference")
return IndirectObject(int(idnum), int(generation), pdf)
readFromStream = staticmethod(readFromStream)
class FloatObject(decimal.Decimal, PdfObject):
def __new__(cls, value="0", context=None):
return decimal.Decimal.__new__(cls, str(value), context)
def __repr__(self):
if self == self.to_integral():
return str(self.quantize(decimal.Decimal(1)))
else:
# XXX: this adds useless extraneous zeros.
return "%.5f" % self
def writeToStream(self, stream, encryption_key):
stream.write(repr(self))
class NumberObject(int, PdfObject):
def __init__(self, value):
int.__init__(value)
def writeToStream(self, stream, encryption_key):
stream.write(repr(self))
def readFromStream(stream):
name = ""
while True:
tok = stream.read(1)
if tok != '+' and tok != '-' and tok != '.' and not tok.isdigit():
stream.seek(-1, 1)
break
name += tok
if name.find(".") != -1:
return FloatObject(name)
else:
return NumberObject(name)
readFromStream = staticmethod(readFromStream)
##
# Given a string (either a "str" or "unicode"), create a ByteStringObject or a
# TextStringObject to represent the string.
def createStringObject(string):
if isinstance(string, unicode):
return TextStringObject(string)
elif isinstance(string, str):
if string.startswith(codecs.BOM_UTF16_BE):
retval = TextStringObject(string.decode("utf-16"))
retval.autodetect_utf16 = True
return retval
else:
# This is probably a big performance hit here, but we need to
# convert string objects into the text/unicode-aware version if
# possible... and the only way to check if that's possible is
# to try. Some strings are strings, some are just byte arrays.
try:
retval = TextStringObject(decode_pdfdocencoding(string))
retval.autodetect_pdfdocencoding = True
return retval
except UnicodeDecodeError:
return ByteStringObject(string)
else:
raise TypeError("createStringObject should have str or unicode arg")
def readHexStringFromStream(stream):
stream.read(1)
txt = ""
x = ""
while True:
tok = readNonWhitespace(stream)
if tok == ">":
break
x += tok
if len(x) == 2:
txt += chr(int(x, base=16))
x = ""
if len(x) == 1:
x += "0"
if len(x) == 2:
txt += chr(int(x, base=16))
return createStringObject(txt)
def readStringFromStream(stream):
tok = stream.read(1)
parens = 1
txt = ""
while True:
tok = stream.read(1)
if tok == "(":
parens += 1
elif tok == ")":
parens -= 1
if parens == 0:
break
elif tok == "\\":
tok = stream.read(1)
if tok == "n":
tok = "\n"
elif tok == "r":
tok = "\r"
elif tok == "t":
tok = "\t"
elif tok == "b":
tok = "\b"
elif tok == "f":
tok = "\f"
elif tok == "(":
tok = "("
elif tok == ")":
tok = ")"
elif tok == "\\":
tok = "\\"
elif tok.isdigit():
# "The number ddd may consist of one, two, or three
# octal digits; high-order overflow shall be ignored.
# Three octal digits shall be used, with leading zeros
# as needed, if the next character of the string is also
# a digit." (PDF reference 7.3.4.2, p 16)
for i in range(2):
ntok = stream.read(1)
if ntok.isdigit():
tok += ntok
else:
break
tok = chr(int(tok, base=8))
elif tok in "\n\r":
# This case is hit when a backslash followed by a line
# break occurs. If it's a multi-char EOL, consume the
# second character:
tok = stream.read(1)
if not tok in "\n\r":
stream.seek(-1, 1)
# Then don't add anything to the actual string, since this
# line break was escaped:
tok = ''
else:
raise utils.PdfReadError("Unexpected escaped string")
txt += tok
return createStringObject(txt)
##
# Represents a string object where the text encoding could not be determined.
# This occurs quite often, as the PDF spec doesn't provide an alternate way to
# represent strings -- for example, the encryption data stored in files (like
# /O) is clearly not text, but is still stored in a "String" object.
class ByteStringObject(str, PdfObject):
##
# For compatibility with TextStringObject.original_bytes. This method
# returns self.
original_bytes = property(lambda self: self)
def writeToStream(self, stream, encryption_key):
bytearr = self
if encryption_key:
bytearr = RC4_encrypt(encryption_key, bytearr)
stream.write("<")
stream.write(bytearr.encode("hex"))
stream.write(">")
##
# Represents a string object that has been decoded into a real unicode string.
# If read from a PDF document, this string appeared to match the
# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
# occur.
class TextStringObject(unicode, PdfObject):
autodetect_pdfdocencoding = False
autodetect_utf16 = False
##
# It is occasionally possible that a text string object gets created where
# a byte string object was expected due to the autodetection mechanism --
# if that occurs, this "original_bytes" property can be used to
# back-calculate what the original encoded bytes were.
original_bytes = property(lambda self: self.get_original_bytes())
def get_original_bytes(self):
# We're a text string object, but the library is trying to get our raw
# bytes. This can happen if we auto-detected this string as text, but
# we were wrong. It's pretty common. Return the original bytes that
# would have been used to create this object, based upon the autodetect
# method.
if self.autodetect_utf16:
return codecs.BOM_UTF16_BE + self.encode("utf-16be")
elif self.autodetect_pdfdocencoding:
return encode_pdfdocencoding(self)
else:
raise Exception("no information about original bytes")
def writeToStream(self, stream, encryption_key):
# Try to write the string out as a PDFDocEncoding encoded string. It's
# nicer to look at in the PDF file. Sadly, we take a performance hit
# here for trying...
try:
bytearr = encode_pdfdocencoding(self)
except UnicodeEncodeError:
bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
if encryption_key:
bytearr = RC4_encrypt(encryption_key, bytearr)
obj = ByteStringObject(bytearr)
obj.writeToStream(stream, None)
else:
stream.write("(")
for c in bytearr:
if not c.isalnum() and c != ' ':
stream.write("\\%03o" % ord(c))
else:
stream.write(c)
stream.write(")")
class NameObject(str, PdfObject):
delimiterCharacters = "(", ")", "<", ">", "[", "]", "{", "}", "/", "%"
def __init__(self, data):
str.__init__(data)
def writeToStream(self, stream, encryption_key):
stream.write(self)
def readFromStream(stream):
name = stream.read(1)
if name != "/":
raise utils.PdfReadError, "name read error"
while True:
tok = stream.read(1)
if tok.isspace() or tok in NameObject.delimiterCharacters:
stream.seek(-1, 1)
break
name += tok
return NameObject(name)
readFromStream = staticmethod(readFromStream)
class DictionaryObject(dict, PdfObject):
sweep_required = True
def __init__(self, *args, **kwargs):
if len(args) == 0:
self.update(kwargs)
elif len(args) == 1:
arr = args[0]
# If we're passed a list/tuple, make a dict out of it
if not hasattr(arr, "iteritems"):
newarr = {}
for k, v in arr:
newarr[k] = v
arr = newarr
self.update(arr)
else:
raise TypeError("dict expected at most 1 argument, got 3")
def update(self, arr):
# note, a ValueError halfway through copying values
# will leave half the values in this dict.
for k, v in arr.iteritems():
self.__setitem__(k, v)
def raw_get(self, key):
return dict.__getitem__(self, key)
def __setitem__(self, key, value):
if not isinstance(key, PdfObject):
raise ValueError("key must be PdfObject")
if not isinstance(value, PdfObject):
raise ValueError("value must be PdfObject")
return dict.__setitem__(self, key, value)
def setdefault(self, key, value=None):
if not isinstance(key, PdfObject):
raise ValueError("key must be PdfObject")
if not isinstance(value, PdfObject):
raise ValueError("value must be PdfObject")
return dict.setdefault(self, key, value)
def __getitem__(self, key):
return dict.__getitem__(self, key).getObject()
##
# Retrieves XMP (Extensible Metadata Platform) data relevant to the
# this object, if available.
# <p>
# Stability: Added in v1.12, will exist for all future v1.x releases.
# @return Returns a {@link #xmp.XmpInformation XmlInformation} instance
# that can be used to access XMP metadata from the document. Can also
# return None if no metadata was found on the document root.
def getXmpMetadata(self):
metadata = self.get("/Metadata", None)
if metadata == None:
return None
metadata = metadata.getObject()
import xmp
if not isinstance(metadata, xmp.XmpInformation):
metadata = xmp.XmpInformation(metadata)
self[NameObject("/Metadata")] = metadata
return metadata
##
# Read-only property that accesses the {@link
# #DictionaryObject.getXmpData getXmpData} function.
# <p>
# Stability: Added in v1.12, will exist for all future v1.x releases.
xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
def writeToStream(self, stream, encryption_key):
stream.write("<<\n")
for key, value in self.items():
key.writeToStream(stream, encryption_key)
stream.write(" ")
value.writeToStream(stream, encryption_key)
stream.write("\n")
stream.write(">>")
def readFromStream(stream, pdf):
tmp = stream.read(2)
if tmp != "<<":
raise utils.PdfReadError, "dictionary read error"
data = {}
while True:
tok = readNonWhitespace(stream)
if tok == ">":
stream.read(1)
break
stream.seek(-1, 1)
key = readObject(stream, pdf)
tok = readNonWhitespace(stream)
stream.seek(-1, 1)
value = readObject(stream, pdf)
if data.has_key(key):
# multiple definitions of key not permitted
raise utils.PdfReadError, "multiple definitions in dictionary"
data[key] = value
pos = stream.tell()
s = readNonWhitespace(stream)
if s == 's' and stream.read(5) == 'tream':
eol = stream.read(1)
# odd PDF file output has spaces after 'stream' keyword but before EOL.
# patch provided by Danial Sandler
while eol == ' ':
eol = stream.read(1)
assert eol in ("\n", "\r")
if eol == "\r":
# read \n after
stream.read(1)
# this is a stream object, not a dictionary
assert data.has_key("/Length")
length = data["/Length"]
if isinstance(length, IndirectObject):
t = stream.tell()
length = pdf.getObject(length)
stream.seek(t, 0)
data["__streamdata__"] = stream.read(length)
e = readNonWhitespace(stream)
ndstream = stream.read(8)
if (e + ndstream) != "endstream":
# (sigh) - the odd PDF file has a length that is too long, so
# we need to read backwards to find the "endstream" ending.
# ReportLab (unknown version) generates files with this bug,
# and Python users into PDF files tend to be our audience.
# we need to do this to correct the streamdata and chop off
# an extra character.
pos = stream.tell()
stream.seek(-10, 1)
end = stream.read(9)
if end == "endstream":
# we found it by looking back one character further.
data["__streamdata__"] = data["__streamdata__"][:-1]
else:
stream.seek(pos, 0)
raise utils.PdfReadError, "Unable to find 'endstream' marker after stream."
else:
stream.seek(pos, 0)
if data.has_key("__streamdata__"):
return StreamObject.initializeFromDictionary(data)
else:
retval = DictionaryObject()
retval.update(data)
return retval
readFromStream = staticmethod(readFromStream)
class TreeObject(DictionaryObject):
def __init__(self):
DictionaryObject.__init__(self)
def hasChildren(self):
return self.has_key('/First')
def __iter__(self):
return self.children()
def children(self):
if not self.hasChildren():
raise StopIteration
child = self['/First']
while True:
yield child
if child == self['/Last']:
raise StopIteration
child = child['/Next']
def addChild(self, child, pdf):
childObj = child.getObject()
child = pdf.getReference(childObj)
assert isinstance(child, IndirectObject)
if not self.has_key('/First'):
self[NameObject('/First')] = child
self[NameObject('/Count')] = NumberObject(0)
prev = None
else:
prev = self['/Last']
self[NameObject('/Last')] = child
self[NameObject('/Count')] = NumberObject(self[NameObject('/Count')] + 1)
if prev:
prevRef = pdf.getReference(prev)
assert isinstance(prevRef, IndirectObject)
childObj[NameObject('/Prev')] = prevRef
prev[NameObject('/Next')] = child
parentRef = pdf.getReference(self)
assert isinstance(parentRef, IndirectObject)
childObj[NameObject('/Parent')] = parentRef
def removeChild(self, child):
childObj = child.getObject()
if not childObj.has_key(NameObject('/Parent')):
raise ValueError, "Removed child does not appear to be a tree item"
elif childObj[NameObject('/Parent')] != self:
raise ValueError, "Removed child is not a member of this tree"
found = False
prevRef = None
prev = None
curRef = self[NameObject('/First')]
cur = curRef.getObject()
lastRef = self[NameObject('/Last')]
last = lastRef.getObject()
while cur != None:
if cur == childObj:
if prev == None:
if cur.has_key(NameObject('/Next')):
# Removing first tree node
nextRef = cur[NameObject('/Next')]
next = nextRef.getObject()
del next[NameObject('/Prev')]
self[NameObject('/First')] = nextRef
self[NameObject('/Count')] = self[NameObject('/Count')] - 1
else:
# Removing only tree node
assert self[NameObject('/Count')] == 1
del self[NameObject('/Count')]
del self[NameObject('/First')]
if self.has_key(NameObject('/Last')):
del self[NameObject('/Last')]
else:
if cur.has_key(NameObject('/Next')):
# Removing middle tree node
nextRef = cur[NameObject('/Next')]
next = nextRef.getObject()
next[NameObject('/Prev')] = prevRef
prev[NameObject('/Next')] = nextRef
self[NameObject('/Count')] = self[NameObject('/Count')] - 1
else:
# Removing last tree node
assert cur == last
del prev[NameObject('/Next')]
self[NameObject('/Last')] = prevRef
self[NameObject('/Count')] = self[NameObject('/Count')] - 1
found = True
break
prevRef = curRef
prev = cur
if cur.has_key(NameObject('/Next')):
curRef = cur[NameObject('/Next')]
cur = curRef.getObject()
else:
curRef = None
cur = None
if not found:
raise ValueError, "Removal couldn't find item in tree"
del childObj[NameObject('/Parent')]
if childObj.has_key(NameObject('/Next')):
del childObj[NameObject('/Next')]
if childObj.has_key(NameObject('/Prev')):
del childObj[NameObject('/Prev')]
def emptyTree(self):
for child in self:
childObj = child.getObject()
del childObj[NameObject('/Parent')]
if childObj.has_key(NameObject('/Next')):
del childObj[NameObject('/Next')]
if childObj.has_key(NameObject('/Prev')):
del childObj[NameObject('/Prev')]
if self.has_key(NameObject('/Count')):
del self[NameObject('/Count')]
if self.has_key(NameObject('/First')):
del self[NameObject('/First')]
if self.has_key(NameObject('/Last')):
del self[NameObject('/Last')]
class StreamObject(DictionaryObject):
def __init__(self):
self._data = None
self.decodedSelf = None
def writeToStream(self, stream, encryption_key):
self[NameObject("/Length")] = NumberObject(len(self._data))
DictionaryObject.writeToStream(self, stream, encryption_key)
del self["/Length"]
stream.write("\nstream\n")
data = self._data
if encryption_key:
data = RC4_encrypt(encryption_key, data)
stream.write(data)
stream.write("\nendstream")
def initializeFromDictionary(data):
if data.has_key("/Filter"):
retval = EncodedStreamObject()
else:
retval = DecodedStreamObject()
retval._data = data["__streamdata__"]
del data["__streamdata__"]
del data["/Length"]
retval.update(data)
return retval
initializeFromDictionary = staticmethod(initializeFromDictionary)
def flateEncode(self):
if self.has_key("/Filter"):
f = self["/Filter"]
if isinstance(f, ArrayObject):
f.insert(0, NameObject("/FlateDecode"))
else:
newf = ArrayObject()
newf.append(NameObject("/FlateDecode"))
newf.append(f)
f = newf
else:
f = NameObject("/FlateDecode")
retval = EncodedStreamObject()
retval[NameObject("/Filter")] = f
retval._data = filters.FlateDecode.encode(self._data)
return retval
class DecodedStreamObject(StreamObject):
def getData(self):
return self._data
def setData(self, data):
self._data = data
class EncodedStreamObject(StreamObject):
def __init__(self):
self.decodedSelf = None
def getData(self):
if self.decodedSelf:
# cached version of decoded object
return self.decodedSelf.getData()
else:
# create decoded object
decoded = DecodedStreamObject()
decoded._data = filters.decodeStreamData(self)
for key, value in self.items():
if not key in ("/Length", "/Filter", "/DecodeParms"):
decoded[key] = value
self.decodedSelf = decoded
return decoded._data
def setData(self, data):
raise utils.PdfReadError, "Creating EncodedStreamObject is not currently supported"
class RectangleObject(ArrayObject):
def __init__(self, arr):
# must have four points
assert len(arr) == 4
# automatically convert arr[x] into NumberObject(arr[x]) if necessary
ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr])
def ensureIsNumber(self, value):
if not isinstance(value, (NumberObject, FloatObject)):
value = FloatObject(value)
return value
def __repr__(self):
return "RectangleObject(%s)" % repr(list(self))
def getLowerLeft_x(self):
return self[0]
def getLowerLeft_y(self):
return self[1]
def getUpperRight_x(self):
return self[2]
def getUpperRight_y(self):
return self[3]
def getUpperLeft_x(self):
return self.getLowerLeft_x()
def getUpperLeft_y(self):
return self.getUpperRight_y()
def getLowerRight_x(self):
return self.getUpperRight_x()
def getLowerRight_y(self):
return self.getLowerLeft_y()
def getLowerLeft(self):
return self.getLowerLeft_x(), self.getLowerLeft_y()
def getLowerRight(self):
return self.getLowerRight_x(), self.getLowerRight_y()
def getUpperLeft(self):
return self.getUpperLeft_x(), self.getUpperLeft_y()
def getUpperRight(self):
return self.getUpperRight_x(), self.getUpperRight_y()
def setLowerLeft(self, value):
self[0], self[1] = [self.ensureIsNumber(x) for x in value]
def setLowerRight(self, value):
self[2], self[1] = [self.ensureIsNumber(x) for x in value]
def setUpperLeft(self, value):
self[0], self[3] = [self.ensureIsNumber(x) for x in value]
def setUpperRight(self, value):
self[2], self[3] = [self.ensureIsNumber(x) for x in value]
def getWidth(self):
return self.getUpperRight_x() - self.getLowerLeft_x()
def getHeight(self):
return self.getUpperRight_y() - self.getLowerLeft_x()
lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
lowerRight = property(getLowerRight, setLowerRight, None, None)
upperLeft = property(getUpperLeft, setUpperLeft, None, None)
upperRight = property(getUpperRight, setUpperRight, None, None)
##
# A class representing a destination within a PDF file.
# See section 8.2.1 of the PDF 1.6 reference.
# Stability: Added in v1.10, will exist for all v1.x releases.
class Destination(TreeObject):
def __init__(self, title, page, typ, *args):
DictionaryObject.__init__(self)
self[NameObject("/Title")] = title
self[NameObject("/Page")] = page
self[NameObject("/Type")] = typ
# from table 8.2 of the PDF 1.6 reference.
if typ == "/XYZ":
(self[NameObject("/Left")], self[NameObject("/Top")],
self[NameObject("/Zoom")]) = args
elif typ == "/FitR":
(self[NameObject("/Left")], self[NameObject("/Bottom")],
self[NameObject("/Right")], self[NameObject("/Top")]) = args
elif typ in ["/FitH", "FitBH"]:
self[NameObject("/Top")], = args
elif typ in ["/FitV", "FitBV"]:
self[NameObject("/Left")], = args
elif typ in ["/Fit", "FitB"]:
pass
else:
raise utils.PdfReadError("Unknown Destination Type: %r" % typ)
def getDestArray(self):
return ArrayObject([self.raw_get('/Page'), self['/Type']] + [self[x] for x in ['/Left','/Bottom','/Right','/Top','/Zoom'] if self.has_key(x)])
def writeToStream(self, stream, encryption_key):
stream.write("<<\n")
key = NameObject('/D')
key.writeToStream(stream, encryption_key)
stream.write(" ")
value = self.getDestArray()
value.writeToStream(stream, encryption_key)
key = NameObject("/S")
key.writeToStream(stream, encryption_key)
stream.write(" ")
value = NameObject("/GoTo")
value.writeToStream(stream, encryption_key)
stream.write("\n")
stream.write(">>")
##
# Read-only property accessing the destination title.
# @return A string.
title = property(lambda self: self.get("/Title"))
##
# Read-only property accessing the destination page.
# @return An integer.
page = property(lambda self: self.get("/Page"))
##
# Read-only property accessing the destination type.
# @return A string.
typ = property(lambda self: self.get("/Type"))
##
# Read-only property accessing the zoom factor.
# @return A number, or None if not available.
zoom = property(lambda self: self.get("/Zoom", None))
##
# Read-only property accessing the left horizontal coordinate.
# @return A number, or None if not available.
left = property(lambda self: self.get("/Left", None))
##
# Read-only property accessing the right horizontal coordinate.
# @return A number, or None if not available.
right = property(lambda self: self.get("/Right", None))
##
# Read-only property accessing the top vertical coordinate.
# @return A number, or None if not available.
top = property(lambda self: self.get("/Top", None))
##
# Read-only property accessing the bottom vertical coordinate.
# @return A number, or None if not available.
bottom = property(lambda self: self.get("/Bottom", None))
class Bookmark(Destination):
def writeToStream(self, stream, encryption_key):
stream.write("<<\n")
for key in [NameObject(x) for x in ['/Title', '/Parent', '/First', '/Last', '/Next', '/Prev'] if self.has_key(x)]:
key.writeToStream(stream, encryption_key)
stream.write(" ")
value = self.raw_get(key)
value.writeToStream(stream, encryption_key)
stream.write("\n")
key = NameObject('/Dest')
key.writeToStream(stream, encryption_key)
stream.write(" ")
value = self.getDestArray()
value.writeToStream(stream, encryption_key)
stream.write("\n")
stream.write(">>")
def encode_pdfdocencoding(unicode_string):
retval = ''
for c in unicode_string:
try:
retval += chr(_pdfDocEncoding_rev[c])
except KeyError:
raise UnicodeEncodeError("pdfdocencoding", c, -1, -1,
"does not exist in translation table")
return retval
def decode_pdfdocencoding(byte_array):
retval = u''
for b in byte_array:
c = _pdfDocEncoding[ord(b)]
if c == u'\u0000':
raise UnicodeDecodeError("pdfdocencoding", b, -1, -1,
"does not exist in translation table")
retval += c
return retval
_pdfDocEncoding = (
u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
u'\u02d8', u'\u02c7', u'\u02c6', u'\u02d9', u'\u02dd', u'\u02db', u'\u02da', u'\u02dc',
u'\u0020', u'\u0021', u'\u0022', u'\u0023', u'\u0024', u'\u0025', u'\u0026', u'\u0027',
u'\u0028', u'\u0029', u'\u002a', u'\u002b', u'\u002c', u'\u002d', u'\u002e', u'\u002f',
u'\u0030', u'\u0031', u'\u0032', u'\u0033', u'\u0034', u'\u0035', u'\u0036', u'\u0037',
u'\u0038', u'\u0039', u'\u003a', u'\u003b', u'\u003c', u'\u003d', u'\u003e', u'\u003f',
u'\u0040', u'\u0041', u'\u0042', u'\u0043', u'\u0044', u'\u0045', u'\u0046', u'\u0047',
u'\u0048', u'\u0049', u'\u004a', u'\u004b', u'\u004c', u'\u004d', u'\u004e', u'\u004f',
u'\u0050', u'\u0051', u'\u0052', u'\u0053', u'\u0054', u'\u0055', u'\u0056', u'\u0057',
u'\u0058', u'\u0059', u'\u005a', u'\u005b', u'\u005c', u'\u005d', u'\u005e', u'\u005f',
u'\u0060', u'\u0061', u'\u0062', u'\u0063', u'\u0064', u'\u0065', u'\u0066', u'\u0067',
u'\u0068', u'\u0069', u'\u006a', u'\u006b', u'\u006c', u'\u006d', u'\u006e', u'\u006f',
u'\u0070', u'\u0071', u'\u0072', u'\u0073', u'\u0074', u'\u0075', u'\u0076', u'\u0077',
u'\u0078', u'\u0079', u'\u007a', u'\u007b', u'\u007c', u'\u007d', u'\u007e', u'\u0000',
u'\u2022', u'\u2020', u'\u2021', u'\u2026', u'\u2014', u'\u2013', u'\u0192', u'\u2044',
u'\u2039', u'\u203a', u'\u2212', u'\u2030', u'\u201e', u'\u201c', u'\u201d', u'\u2018',
u'\u2019', u'\u201a', u'\u2122', u'\ufb01', u'\ufb02', u'\u0141', u'\u0152', u'\u0160',
u'\u0178', u'\u017d', u'\u0131', u'\u0142', u'\u0153', u'\u0161', u'\u017e', u'\u0000',
u'\u20ac', u'\u00a1', u'\u00a2', u'\u00a3', u'\u00a4', u'\u00a5', u'\u00a6', u'\u00a7',
u'\u00a8', u'\u00a9', u'\u00aa', u'\u00ab', u'\u00ac', u'\u0000', u'\u00ae', u'\u00af',
u'\u00b0', u'\u00b1', u'\u00b2', u'\u00b3', u'\u00b4', u'\u00b5', u'\u00b6', u'\u00b7',
u'\u00b8', u'\u00b9', u'\u00ba', u'\u00bb', u'\u00bc', u'\u00bd', u'\u00be', u'\u00bf',
u'\u00c0', u'\u00c1', u'\u00c2', u'\u00c3', u'\u00c4', u'\u00c5', u'\u00c6', u'\u00c7',
u'\u00c8', u'\u00c9', u'\u00ca', u'\u00cb', u'\u00cc', u'\u00cd', u'\u00ce', u'\u00cf',
u'\u00d0', u'\u00d1', u'\u00d2', u'\u00d3', u'\u00d4', u'\u00d5', u'\u00d6', u'\u00d7',
u'\u00d8', u'\u00d9', u'\u00da', u'\u00db', u'\u00dc', u'\u00dd', u'\u00de', u'\u00df',
u'\u00e0', u'\u00e1', u'\u00e2', u'\u00e3', u'\u00e4', u'\u00e5', u'\u00e6', u'\u00e7',
u'\u00e8', u'\u00e9', u'\u00ea', u'\u00eb', u'\u00ec', u'\u00ed', u'\u00ee', u'\u00ef',
u'\u00f0', u'\u00f1', u'\u00f2', u'\u00f3', u'\u00f4', u'\u00f5', u'\u00f6', u'\u00f7',
u'\u00f8', u'\u00f9', u'\u00fa', u'\u00fb', u'\u00fc', u'\u00fd', u'\u00fe', u'\u00ff'
)
assert len(_pdfDocEncoding) == 256
_pdfDocEncoding_rev = {}
for i in xrange(256):
char = _pdfDocEncoding[i]
if char == u"\u0000":
continue
assert char not in _pdfDocEncoding_rev
_pdfDocEncoding_rev[char] = i