206 lines
7.4 KiB
Python
206 lines
7.4 KiB
Python
# A part of pdfrw (pdfrw.googlecode.com)
|
|
# Copyright (C) 2006-2012 Patrick Maupin, Austin, Texas
|
|
# MIT license -- See LICENSE.txt for details
|
|
|
|
from pdfrw.objects.pdfname import PdfName
|
|
from pdfrw.objects.pdfindirect import PdfIndirect
|
|
from pdfrw.objects.pdfobject import PdfObject
|
|
|
|
class _DictSearch(object):
|
|
''' Used to search for inheritable attributes.
|
|
'''
|
|
def __init__(self, basedict):
|
|
self.basedict = basedict
|
|
def __getattr__(self, name, PdfName=PdfName):
|
|
return self[PdfName(name)]
|
|
def __getitem__(self, name, set=set, getattr=getattr, id=id):
|
|
visited = set()
|
|
mydict = self.basedict
|
|
while 1:
|
|
value = mydict[name]
|
|
if value is not None:
|
|
return value
|
|
myid = id(mydict)
|
|
assert myid not in visited
|
|
visited.add(myid)
|
|
mydict = mydict.Parent
|
|
if mydict is None:
|
|
return
|
|
|
|
class _Private(object):
|
|
''' Used to store private attributes (not output to PDF files)
|
|
on PdfDict classes
|
|
'''
|
|
def __init__(self, pdfdict):
|
|
vars(self)['pdfdict'] = pdfdict
|
|
def __setattr__(self, name, value):
|
|
vars(self.pdfdict)[name] = value
|
|
|
|
class PdfDict(dict):
|
|
''' PdfDict objects are subclassed dictionaries with the following features:
|
|
|
|
- Every key in the dictionary starts with "/"
|
|
|
|
- A dictionary item can be deleted by assigning it to None
|
|
|
|
- Keys that (after the initial "/") conform to Python naming conventions
|
|
can also be accessed (set and retrieved) as attributes of the dictionary.
|
|
E.g. mydict.Page is the same thing as mydict['/Page']
|
|
|
|
- Private attributes (not in the PDF space) can be set on the dictionary
|
|
object attribute dictionary by using the private attribute:
|
|
|
|
mydict.private.foo = 3
|
|
mydict.foo = 5
|
|
x = mydict.foo # x will now contain 3
|
|
y = mydict['/foo'] # y will now contain 5
|
|
|
|
Most standard adobe dictionary keys start with an upper case letter,
|
|
so to avoid conflicts, it is best to start private attributes with
|
|
lower case letters.
|
|
|
|
- PdfDicts have the following read-only properties:
|
|
|
|
- private -- as discussed above, provides write access to dictionary's
|
|
attributes
|
|
- inheritable -- this creates and returns a "view" attribute that
|
|
will search through the object hierarchy for any desired
|
|
attribute, such as /Rotate or /MediaBox
|
|
|
|
- PdfDicts also have the following special attributes:
|
|
- indirect is not stored in the PDF dictionary, but in the object's
|
|
attribute dictionary
|
|
- stream is also stored in the object's attribute dictionary
|
|
and will also update the stream length.
|
|
- _stream will store in the object's attribute dictionary without
|
|
updating the stream length.
|
|
|
|
It is possible, for example, to have a PDF name such as "/indirect"
|
|
or "/stream", but you cannot access such a name as an attribute:
|
|
|
|
mydict.indirect -- accesses object's attribute dictionary
|
|
mydict["/indirect"] -- accesses actual PDF dictionary
|
|
'''
|
|
indirect = False
|
|
stream = None
|
|
|
|
_special = dict(indirect = ('indirect', False),
|
|
stream = ('stream', True),
|
|
_stream = ('stream', False),
|
|
)
|
|
|
|
def __setitem__(self, name, value, setter=dict.__setitem__):
|
|
assert name.startswith('/'), name
|
|
if value is not None:
|
|
setter(self, name, value)
|
|
elif name in self:
|
|
del self[name]
|
|
|
|
def __init__(self, *args, **kw):
|
|
if args:
|
|
if len(args) == 1:
|
|
args = args[0]
|
|
self.update(args)
|
|
if isinstance(args, PdfDict):
|
|
self.indirect = args.indirect
|
|
self._stream = args.stream
|
|
for key, value in kw.iteritems():
|
|
setattr(self, key, value)
|
|
|
|
def __getattr__(self, name, PdfName=PdfName):
|
|
''' If the attribute doesn't exist on the dictionary object,
|
|
try to slap a '/' in front of it and get it out
|
|
of the actual dictionary itself.
|
|
'''
|
|
return self.get(PdfName(name))
|
|
|
|
def get(self, key, dictget=dict.get, isinstance=isinstance, PdfIndirect=PdfIndirect):
|
|
''' Get a value out of the dictionary, after resolving any indirect objects.
|
|
'''
|
|
value = dictget(self, key)
|
|
if isinstance(value, PdfIndirect):
|
|
self[key] = value = value.real_value()
|
|
return value
|
|
|
|
def __getitem__(self, key):
|
|
return self.get(key)
|
|
|
|
def __setattr__(self, name, value, special=_special.get, PdfName=PdfName, vars=vars):
|
|
''' Set an attribute on the dictionary. Handle the keywords
|
|
indirect, stream, and _stream specially (for content objects)
|
|
'''
|
|
info = special(name)
|
|
if info is None:
|
|
self[PdfName(name)] = value
|
|
else:
|
|
name, setlen = info
|
|
vars(self)[name] = value
|
|
if setlen:
|
|
notnone = value is not None
|
|
self.Length = notnone and PdfObject(len(value)) or None
|
|
|
|
def iteritems(self, dictiter=dict.iteritems, isinstance=isinstance, PdfIndirect=PdfIndirect):
|
|
''' Iterate over the dictionary, resolving any unresolved objects
|
|
'''
|
|
for key, value in list(dictiter(self)):
|
|
if isinstance(value, PdfIndirect):
|
|
self[key] = value = value.real_value()
|
|
if value is not None:
|
|
assert key.startswith('/'), (key, value)
|
|
yield key, value
|
|
|
|
def items(self):
|
|
return list(self.iteritems())
|
|
def itervalues(self):
|
|
for key, value in self.iteritems():
|
|
yield value
|
|
def values(self):
|
|
return list((value for key, value in self.iteritems()))
|
|
def keys(self):
|
|
return list((key for key, value in self.iteritems()))
|
|
def __iter__(self):
|
|
for key, value in self.iteritems():
|
|
yield key
|
|
def iterkeys(self):
|
|
return iter(self)
|
|
|
|
def copy(self):
|
|
return type(self)(self)
|
|
|
|
def pop(self, key):
|
|
value = self.get(key)
|
|
del self[key]
|
|
return value
|
|
|
|
def popitem(self):
|
|
key, value = dict.pop(self)
|
|
if isinstance(value, PdfIndirect):
|
|
value = value.real_value()
|
|
return value
|
|
|
|
def inheritable(self):
|
|
''' Search through ancestors as needed for inheritable
|
|
dictionary items.
|
|
NOTE: You might think it would be a good idea
|
|
to cache this class, but then you'd have to worry
|
|
about it pointing to the wrong dictionary if you
|
|
made a copy of the object...
|
|
'''
|
|
return _DictSearch(self)
|
|
inheritable = property(inheritable)
|
|
|
|
def private(self):
|
|
''' Allows setting private metadata for use in
|
|
processing (not sent to PDF file).
|
|
See note on inheritable
|
|
'''
|
|
return _Private(self)
|
|
private = property(private)
|
|
|
|
class IndirectPdfDict(PdfDict):
|
|
''' IndirectPdfDict is a convenience class. You could
|
|
create a direct PdfDict and then set indirect = True on it,
|
|
or you could just create an IndirectPdfDict.
|
|
'''
|
|
indirect = True
|