debian-pdfrw/pdfrw/objects/pdfdict.py

# A part of pdfrw (pdfrw.googlecode.com)
# Copyright (C) 2006-2012 Patrick Maupin, Austin, Texas
# MIT license -- See LICENSE.txt for details

from pdfrw.objects.pdfname import PdfName
from pdfrw.objects.pdfindirect import PdfIndirect
from pdfrw.objects.pdfobject import PdfObject

class _DictSearch(object):
    '''  Used to search for inheritable attributes.
    '''
    def __init__(self, basedict):
        self.basedict = basedict
    def __getattr__(self, name, PdfName=PdfName):
        return self[PdfName(name)]
    def __getitem__(self, name, set=set, getattr=getattr, id=id):
        visited = set()
        mydict = self.basedict
        while 1:
            value = mydict[name]
            if value is not None:
                return value
            myid = id(mydict)
            assert myid not in visited
            visited.add(myid)
            mydict = mydict.Parent
            if mydict is None:
                return

class _Private(object):
    ''' Used to store private attributes (not output to PDF files)
        on PdfDict classes
    '''
    def __init__(self, pdfdict):
        vars(self)['pdfdict'] = pdfdict
    def __setattr__(self, name, value):
        vars(self.pdfdict)[name] = value

class PdfDict(dict):
    ''' PdfDict objects are subclassed dictionaries with the following features:

        - Every key in the dictionary starts with "/"

        - A dictionary item can be deleted by assigning it to None

        - Keys that (after the initial "/") conform to Python naming conventions
          can also be accessed (set and retrieved) as attributes of the dictionary.
          E.g.  mydict.Page is the same thing as mydict['/Page']

        - Private attributes (not in the PDF space) can be set on the dictionary
          object attribute dictionary by using the private attribute:

                mydict.private.foo = 3
                mydict.foo = 5
                x = mydict.foo       # x will now contain 3
                y = mydict['/foo']   # y will now contain 5

          Most standard adobe dictionary keys start with an upper case letter,
          so to avoid conflicts, it is best to start private attributes with
          lower case letters.

        - PdfDicts have the following read-only properties:

            - private -- as discussed above, provides write access to dictionary's
                         attributes
            - inheritable -- this creates and returns a "view" attribute that
                         will search through the object hierarchy for any desired
                         attribute, such as /Rotate or /MediaBox

        - PdfDicts also have the following special attributes:
            - indirect is not stored in the PDF dictionary, but in the object's
              attribute dictionary
            - stream is also stored in the object's attribute dictionary
              and will also update the stream length.
            - _stream will store in the object's attribute dictionary without
              updating the stream length.

            It is possible, for example, to have a PDF name such as "/indirect"
            or "/stream", but you cannot access such a name as an attribute:

                mydict.indirect -- accesses object's attribute dictionary
                mydict["/indirect"] -- accesses actual PDF dictionary
    '''
    indirect = False
    stream = None

    _special = dict(indirect = ('indirect', False),
                    stream = ('stream', True),
                    _stream = ('stream', False),
                   )

    def __setitem__(self, name, value, setter=dict.__setitem__):
        assert name.startswith('/'), name
        if value is not None:
            setter(self, name, value)
        elif name in self:
            del self[name]

    def __init__(self, *args, **kw):
        if args:
            if len(args) == 1:
                args = args[0]
            self.update(args)
            if isinstance(args, PdfDict):
                self.indirect = args.indirect
                self._stream = args.stream
        for key, value in kw.iteritems():
            setattr(self, key, value)

    def __getattr__(self, name, PdfName=PdfName):
        ''' If the attribute doesn't exist on the dictionary object,
            try to slap a '/' in front of it and get it out
            of the actual dictionary itself.
        '''
        return self.get(PdfName(name))

    def get(self, key, dictget=dict.get, isinstance=isinstance, PdfIndirect=PdfIndirect):
        ''' Get a value out of the dictionary, after resolving any indirect objects.
        '''
        value = dictget(self, key)
        if isinstance(value, PdfIndirect):
            self[key] = value = value.real_value()
        return value

    def __getitem__(self, key):
        return self.get(key)

    def __setattr__(self, name, value, special=_special.get, PdfName=PdfName, vars=vars):
        ''' Set an attribute on the dictionary.  Handle the keywords
            indirect, stream, and _stream specially (for content objects)
        '''
        info = special(name)
        if info is None:
            self[PdfName(name)] = value
        else:
            name, setlen = info
            vars(self)[name] = value
            if setlen:
                notnone = value is not None
                self.Length = notnone and PdfObject(len(value)) or None

    def iteritems(self, dictiter=dict.iteritems, isinstance=isinstance, PdfIndirect=PdfIndirect):
        ''' Iterate over the dictionary, resolving any unresolved objects
        '''
        for key, value in list(dictiter(self)):
            if isinstance(value, PdfIndirect):
                self[key] = value = value.real_value()
            if value is not None:
                assert key.startswith('/'), (key, value)
                yield key, value

    def items(self):
        return list(self.iteritems())
    def itervalues(self):
        for key, value in self.iteritems():
            yield value
    def values(self):
        return list((value for key, value in self.iteritems()))
    def keys(self):
        return list((key for key, value in self.iteritems()))
    def __iter__(self):
        for key, value in self.iteritems():
            yield key
    def iterkeys(self):
        return iter(self)

    def copy(self):
        return type(self)(self)

    def pop(self, key):
        value = self.get(key)
        del self[key]
        return value

    def popitem(self):
        key, value = dict.pop(self)
        if isinstance(value, PdfIndirect):
            value = value.real_value()
        return value

    def inheritable(self):
        ''' Search through ancestors as needed for inheritable
            dictionary items.
            NOTE:  You might think it would be a good idea
            to cache this class, but then you'd have to worry
            about it pointing to the wrong dictionary if you
            made a copy of the object...
        '''
        return _DictSearch(self)
    inheritable = property(inheritable)

    def private(self):
        ''' Allows setting private metadata for use in
            processing (not sent to PDF file).
            See note on inheritable
        '''
        return _Private(self)
    private = property(private)

class IndirectPdfDict(PdfDict):
    ''' IndirectPdfDict is a convenience class.  You could
        create a direct PdfDict and then set indirect = True on it,
        or you could just create an IndirectPdfDict.
    '''
    indirect = True