1735 lines
44 KiB
Python
1735 lines
44 KiB
Python
# pyatom.py -- PyAtom library module
|
|
|
|
"""
|
|
PyAtom
|
|
|
|
Module to make it really easy to create Atom syndication feeds.
|
|
|
|
This module is Copyright (C) 2006 Steve R. Hastings.
|
|
Licensed under the Academic Free License version 2.1
|
|
|
|
You might want to start with the test cases at the end; see how they
|
|
work, and then go back and look at the code in the module.
|
|
|
|
I hope you find this useful!
|
|
|
|
Steve R. Hastings
|
|
|
|
Please send your questions or comments to this email address:
|
|
|
|
pyatom@langri.com
|
|
"""
|
|
|
|
|
|
|
|
import re
|
|
import sys
|
|
import time
|
|
|
|
s_pyatom_name = "PyAtom"
|
|
s_pyatom_ver = "0.3.9"
|
|
s_pyatom_name_ver = "%s version %s" % (s_pyatom_name, s_pyatom_ver)
|
|
|
|
# string constants
|
|
# These string values are used in more than one place.
|
|
|
|
s_version = "version"
|
|
s_encoding = "encoding"
|
|
s_standalone = "standalone"
|
|
|
|
s_href = "href"
|
|
s_lang = "xml:lang"
|
|
s_link = "link"
|
|
s_term = "term"
|
|
s_type = "type"
|
|
|
|
|
|
|
|
def set_s_indent(s):
|
|
"""
|
|
Set up the globals PyAtom uses to indent its output:
|
|
s_indent, and s_indent_big
|
|
|
|
s_indent is the string to indent one level; default is \\t.
|
|
|
|
s_indent_big is s_indent concatenated many times. PyAtom uses slice
|
|
copies to get indent strings from s_indent_big.
|
|
"""
|
|
global s_indent
|
|
global s_indent_big
|
|
s_indent = s
|
|
s_indent_big = s*256
|
|
|
|
set_s_indent("\t")
|
|
|
|
|
|
|
|
class TFC(object):
|
|
"""
|
|
class TFC: Tag Format Control.
|
|
Controls how tags are converted to strings.
|
|
|
|
Arguments to __init__():
|
|
level Specifies what indent level to start at for output. Default 0.
|
|
mode Specifies how to format the output:
|
|
mode_terse -- minimal output (no indenting)
|
|
mode_normal -- default
|
|
mode_verbose -- normal, plus some XML comments
|
|
|
|
Normally, if an XML item has no data, nothing is printed, but with
|
|
mode_verbose you may get a comment like "Collection with 0 entries".
|
|
|
|
Methods:
|
|
b_print_all()
|
|
Return True if TFC set for full printing.
|
|
b_print_terse()
|
|
Return True if TFC set for terse printing.
|
|
b_print_verbose()
|
|
Return True if TFC set for verbose printing.
|
|
|
|
indent_by(incr)
|
|
Return a TFC instance that indents by incr columns.
|
|
s_indent(extra_indent=0)
|
|
Return an indent string.
|
|
"""
|
|
mode_terse, mode_normal, mode_verbose = range(3)
|
|
|
|
def __init__(self, level=0, mode=mode_normal):
|
|
"""
|
|
Arguments:
|
|
level Specifies what indent level to start at for output. Default 0.
|
|
mode Specifies how to format the output:
|
|
mode_terse -- minimal output (no indenting)
|
|
mode_normal -- default
|
|
mode_verbose -- normal, plus some XML comments
|
|
|
|
Normally, if an XML item has no data, nothing is printed, but with
|
|
mode_verbose you may get a comment like "Collection with 0 entries".
|
|
"""
|
|
self.level = level
|
|
self.mode = mode
|
|
|
|
def b_print_all(self):
|
|
"""
|
|
Return True if TFC set for full printing.
|
|
|
|
Some optional things are usually suppressed, but will be printed
|
|
if the current level is 0. And everything gets printed when
|
|
mode_verbose is set.
|
|
"""
|
|
return self.level == 0 or self.mode == TFC.mode_verbose
|
|
|
|
def b_print_terse(self):
|
|
"""
|
|
Return True if TFC set for terse printing.
|
|
"""
|
|
return self.mode == TFC.mode_terse
|
|
|
|
def b_print_verbose(self):
|
|
"""
|
|
Return True if TFC set for verbose printing.
|
|
"""
|
|
return self.mode == TFC.mode_verbose
|
|
|
|
def indent_by(self, incr):
|
|
"""
|
|
Return a TFC instance that indents by incr columns.
|
|
|
|
Pass this to a function that takes a TFC to get a temporary indent.
|
|
"""
|
|
return TFC(self.level + incr, self.mode)
|
|
def s_indent(self, extra_indent=0):
|
|
"""
|
|
Return an indent string.
|
|
|
|
Return a string of white space that indents correctly for the
|
|
current TFC settings. If specified, extra_indent will be added
|
|
to the current indent level.
|
|
"""
|
|
if self.mode == TFC.mode_terse:
|
|
return ""
|
|
level = self.level + extra_indent
|
|
return s_indent_big[0:level]
|
|
|
|
|
|
|
|
pat_nbsp = re.compile(r' ')
|
|
def s_entities_to_ws(s):
|
|
"""
|
|
Return a copy of s with HTML whitespace entities replaced by a space.
|
|
|
|
Currently just gets rid of HTML non-breaking spaces (" ").
|
|
"""
|
|
if not s:
|
|
return s
|
|
|
|
s = re.sub(pat_nbsp, " ", s)
|
|
return s
|
|
|
|
def s_normalize_ws(s):
|
|
"""
|
|
Return a copy of string s with each run of whitespace replaced by one space.
|
|
>>> s = "and now\n\n\nfor \t something\v completely\r\n different"
|
|
>>> print s_normalize_ws(s)
|
|
and now for something completely different
|
|
>>>
|
|
"""
|
|
lst = s.split()
|
|
s = " ".join(lst)
|
|
return s
|
|
|
|
|
|
def s_escape_html(s):
|
|
"""
|
|
Return a copy of string s with HTML codes escaped.
|
|
|
|
This is useful when you want HTML tags printed literally, rather than
|
|
interpreted.
|
|
|
|
>>> print s_escape_html("<head>")
|
|
<head>
|
|
>>> print s_escape_html(" ")
|
|
&nbsp;
|
|
"""
|
|
s = s.replace("&", "&")
|
|
s = s.replace("<", "<")
|
|
s = s.replace(">", ">")
|
|
return s
|
|
|
|
def s_create_atom_id(t, domain_name, uri=""):
|
|
"""
|
|
Create ID using Mark Pilgrim's algorithm.
|
|
|
|
Algorithm taken from here:
|
|
http://diveintomark.org/archives/2004/05/28/howto-atom-id
|
|
"""
|
|
|
|
# ymd (year-month-day) example: 2003-12-13
|
|
ymd = time.strftime("%Y-%m-%d", t)
|
|
|
|
if uri == "":
|
|
# mush (all mushed together) example: 20031213083000
|
|
mush = time.strftime("%Y%m%d%H%M%S", t)
|
|
uri = "/weblog/" + mush
|
|
|
|
# s = "tag:" + domain_name + "," + ymd + ":" + uri
|
|
s = "tag:%s,%s:%s" % (domain_name, ymd, uri)
|
|
|
|
s = s.replace("#", "/")
|
|
|
|
return s
|
|
|
|
s_copyright_multiyear = "Copyright %s %d-%d by %s."
|
|
s_copyright_oneyear = "Copyright %s %d by %s."
|
|
def s_copyright(s_owner, s_csym="(C)", end_year=None, start_year=None):
|
|
"""
|
|
Return a string with a copyright notice.
|
|
|
|
s_owner
|
|
string with copyright owner's name.
|
|
s_csym
|
|
string with copyright symbol. (An HTML entity might be good here.)
|
|
end_year
|
|
last year of the copyright. Default is the current year.
|
|
start_year
|
|
first year of the copyright.
|
|
|
|
If only end_year is specified, only print one year; if both end_year and
|
|
start_year are specified, print a range.
|
|
|
|
To localize the entire copyright message into another language, change
|
|
the global variables with the copyright template:
|
|
s_copyright_multiyear: for a year range
|
|
s_copyright_oneyear: for a single year
|
|
"""
|
|
if not end_year:
|
|
end_year = time.localtime().tm_year
|
|
|
|
if start_year:
|
|
return s_copyright_multiyear % (s_csym, start_year, end_year, s_owner)
|
|
|
|
return s_copyright_oneyear % (s_csym, end_year, s_owner)
|
|
|
|
|
|
|
|
# Here are all of the possible XML items.
|
|
#
|
|
# Supported by PyAtom:
|
|
# XML Declaration: <?xml ... ?>
|
|
# Comments: <!-- ... -->
|
|
# Elements: <tag_name>...</tag_name>
|
|
#
|
|
# Minimal support:
|
|
# Markup Declarations: <!KEYWORD ... >
|
|
# Processing Instructions (PIs): <?KEYWORD ... ?>
|
|
#
|
|
# Not currently supported:
|
|
# INCLUDE and IGNORE directives: <!KEYWORD[ ... ]]>
|
|
# CDATA sections: <![CDATA[ ... ]]>
|
|
#
|
|
|
|
class XMLItem(object):
|
|
"""
|
|
All PyAtom classes inherit from this class. All it does is provide a
|
|
few default methods, and be a root for the inheritance tree.
|
|
|
|
An XMLItem has several methods that return an XML tag representation of
|
|
its contents. Each XMLItem knows how to make a tag for itself. An
|
|
XMLItem that contains other XMLItems will ask each one to make a tag;
|
|
so asking the top-level XMLItem for a tag will cause the entire tree
|
|
of XMLItems to recursively make tags, and you get a full XML
|
|
representation with tags appropriately nested and indented.
|
|
"""
|
|
def _s_tag(self, tfc):
|
|
"""
|
|
A stub which must always be overridden by child classes.
|
|
"""
|
|
assert False, "XMLItem instance is too abstract to print."
|
|
|
|
def s_tag(self, level):
|
|
"""
|
|
Return the item as a string containing an XML tag declaration.
|
|
|
|
The XML tag will be indented.
|
|
Will return an empty string if the item is empty.
|
|
"""
|
|
tfc = TFC(level, TFC.mode_normal)
|
|
return self._s_tag(tfc)
|
|
|
|
def s_tag_verbose(self, level):
|
|
"""
|
|
Return the item as a string containing an XML tag declaration.
|
|
|
|
The XML tag will be indented.
|
|
May return an XML Comment if the item is empty.
|
|
"""
|
|
tfc = TFC(level, TFC.mode_verbose)
|
|
return self._s_tag(tfc)
|
|
|
|
def s_tag_terse(self, level):
|
|
"""
|
|
Return the item as a string containing an XML tag declaration.
|
|
|
|
The XML tag will not be indented.
|
|
Will return an empty string if the item is empty.
|
|
"""
|
|
tfc = TFC(level, TFC.mode_terse)
|
|
return self._s_tag(tfc)
|
|
|
|
def __str__(self):
|
|
return self.s_tag(0)
|
|
|
|
def level(self):
|
|
"""
|
|
Return an integer describing what level this tag is.
|
|
|
|
The root tag of an XML document is level 0; document-level comments
|
|
or other document-level declarations are also level 0. Tags nested
|
|
inside the root tag are level 1, tags nested inside those tags are
|
|
level 2, and so on.
|
|
|
|
This is currently only used by the s_tree() functions. When
|
|
printing tags normally, the code that walks the tree keeps track of
|
|
what level is current.
|
|
"""
|
|
level = 0
|
|
while self._parent != None:
|
|
self = self._parent
|
|
if self.is_element():
|
|
level += 1
|
|
return level
|
|
|
|
def s_name(self):
|
|
"""
|
|
Return a name for the current item.
|
|
|
|
Used only by the s_tree() functions.
|
|
"""
|
|
if self._name:
|
|
return self._name
|
|
return "unnamed_instance_of_" + type(self).__name__
|
|
|
|
def s_tree(self):
|
|
"""
|
|
Return a verbose tree showing the current tag and its children.
|
|
|
|
This is for debugging; it's not valid XML syntax.
|
|
"""
|
|
level = self.level()
|
|
return "%2d) %s\t%s" % (level, self.s_name(), str(self))
|
|
|
|
|
|
|
|
class DocItem(XMLItem):
|
|
"""
|
|
A document-level XML item (appearing above root element).
|
|
|
|
Items that can be document-level inherit from this class.
|
|
"""
|
|
pass
|
|
|
|
|
|
|
|
class ElementItem(XMLItem):
|
|
"""
|
|
An item that may be nested inside an element.
|
|
|
|
Items that can be nested inside other elements inherit from this class.
|
|
"""
|
|
pass
|
|
|
|
|
|
|
|
class Comment(DocItem,ElementItem):
|
|
"""
|
|
An XML comment.
|
|
|
|
Attributes:
|
|
text
|
|
set the text of the comment
|
|
"""
|
|
def __init__(self, text=""):
|
|
"""
|
|
text: set the text of the comment
|
|
"""
|
|
self._parent = None
|
|
self._name = ""
|
|
self.tag_name = "comment"
|
|
self.text = text
|
|
|
|
def _s_tag(self, tfc):
|
|
if not self:
|
|
if tfc.b_print_all():
|
|
return tfc.s_indent() + "<!-- -->"
|
|
else:
|
|
return ""
|
|
else:
|
|
if self.text.find("\n") >= 0:
|
|
lst = []
|
|
lst.append(tfc.s_indent() + "<!--")
|
|
lst.append(self.text)
|
|
lst.append(tfc.s_indent() + "-->")
|
|
return "\n".join(lst)
|
|
else:
|
|
s = "%s%s%s%s" % (tfc.s_indent(), "<!-- ", self.text, " -->")
|
|
return s
|
|
|
|
assert False, "not possible to reach this line."
|
|
|
|
def __nonzero__(self):
|
|
# Returns True if there is any comment text.
|
|
# Returns False otherwise.
|
|
return not not self.text
|
|
|
|
def is_element(self):
|
|
return True
|
|
|
|
|
|
|
|
# REVIEW: can a PI be an ElementItem?
|
|
class PI(DocItem):
|
|
"""
|
|
XML Processing Instruction (PI).
|
|
|
|
Attributes:
|
|
keyword
|
|
text
|
|
"""
|
|
def __init__(self):
|
|
self._parent = None
|
|
self._name = ""
|
|
self.keyword = ""
|
|
self.text = ""
|
|
|
|
def _s_tag(self, tfc):
|
|
if not self:
|
|
return ""
|
|
else:
|
|
if self.text.find("\n") >= 0:
|
|
lst = []
|
|
lst.append("%s%s%s" % (tfc.s_indent(), "<?", self.keyword))
|
|
lst.append(self.text)
|
|
lst.append("%s%s" % (tfc.s_indent(), "?>"))
|
|
return "\n".join(lst)
|
|
else:
|
|
s = "%s%s%s %s%s"% \
|
|
(tfc.s_indent(), "<?", self.keyword, self.text, "?>")
|
|
return s
|
|
|
|
assert False, "not possible to reach this line."
|
|
|
|
def __nonzero__(self):
|
|
# Returns True if there is any keyword.
|
|
# Returns False otherwise.
|
|
return not not self.keyword
|
|
|
|
|
|
|
|
# REVIEW: can a MarkupDecl be an ElementItem?
|
|
class MarkupDecl(DocItem):
|
|
"""
|
|
XML Markup Declaration.
|
|
|
|
Attributes:
|
|
keyword
|
|
text
|
|
"""
|
|
def __init__(self):
|
|
self._parent = None
|
|
self._name = ""
|
|
self.keyword = ""
|
|
self.text = ""
|
|
|
|
def _s_tag(self, tfc):
|
|
if not self:
|
|
return ""
|
|
else:
|
|
if self.text.find("\n") >= 0:
|
|
lst = []
|
|
lst.append("%s%s%s" % (tfc.s_indent(), "<!", self.keyword))
|
|
lst.append(self.text)
|
|
lst.append("%s%s" % (tfc.s_indent(), ">"))
|
|
return "\n".join(lst)
|
|
else:
|
|
s = "%s%s%s %s%s" % \
|
|
(tfc.s_indent(), "<!", self.keyword, self.text, ">")
|
|
return s
|
|
|
|
assert False, "not possible to reach this line."
|
|
|
|
def __nonzero__(self):
|
|
# Returns True if there is any keyword.
|
|
# Returns False otherwise.
|
|
return not not self.keyword
|
|
|
|
|
|
|
|
class CoreElement(ElementItem):
|
|
"""
|
|
This is an abstract class.
|
|
|
|
All of the XML element classes inherit from this.
|
|
"""
|
|
def __init__(self, tag_name, def_attr, def_attr_value, attr_names = []):
|
|
# dictionary of attributes and their values
|
|
self.lock = False
|
|
self._parent = None
|
|
self._name = ""
|
|
self.tag_name = tag_name
|
|
self.def_attr = def_attr
|
|
self.attrs = {}
|
|
if def_attr and def_attr_value:
|
|
self.attrs[def_attr] = def_attr_value
|
|
self.attr_names = attr_names
|
|
self.lock = True
|
|
|
|
def __nonzero__(self):
|
|
# Returns True if any attrs are set or there are any contents.
|
|
# Returns False otherwise.
|
|
return not not self.attrs or self.has_contents()
|
|
|
|
def text_check(self):
|
|
"""
|
|
Raise an exception, unless element has text contents.
|
|
|
|
Child classes that have text must override this to do nothing.
|
|
"""
|
|
raise TypeError, "element does not have text contents"
|
|
|
|
def nest_check(self):
|
|
"""
|
|
Raise an exception, unless element can nest other elements.
|
|
|
|
Child classes that can nest must override this to do nothing.
|
|
"""
|
|
raise TypeError, "element cannot nest other elements"
|
|
|
|
def __delattr__(self, name):
|
|
# REVIEW: this should be made to work!
|
|
raise TypeError, "cannot delete elements"
|
|
|
|
def __getattr__(self, name):
|
|
if name == "lock":
|
|
# If the "lock" hasn't been created yet, we always want it
|
|
# to be False, i.e. we are not locked.
|
|
return False
|
|
else:
|
|
raise AttributeError, name
|
|
|
|
def __setattr__(self, name, value):
|
|
# Here's how this works:
|
|
#
|
|
# 0) "self.lock" is a boolean, set to False during __init__()
|
|
# but turned True afterwards. When it's False, you can add new
|
|
# members to the class instance without any sort of checks; once
|
|
# it's set True, __setattr__() starts checking assignments.
|
|
# By default, when lock is True, you cannot add a new member to
|
|
# the class instance, and any assignment to an old member has to
|
|
# be of matching type. So if you say "a.text = string", the
|
|
# .text member has to exist and be a string member.
|
|
#
|
|
# This is the default __setattr__() for all element types. It
|
|
# gets overloaded by the __setattr__() in NestElement, because
|
|
# for nested elments, it makes sense to be able to add new
|
|
# elements nested inside.
|
|
#
|
|
# This is moderately nice. But later in NestElement there is a
|
|
# version of __setattr__() that is *very* nice; check it out.
|
|
#
|
|
# 1) This checks assignments to _parent, and makes sure they are
|
|
# plausible (either an XMLItem, or None).
|
|
|
|
try:
|
|
lock = self.lock
|
|
except AttributeError:
|
|
lock = False
|
|
|
|
if not lock:
|
|
self.__dict__[name] = value
|
|
return
|
|
|
|
dict = self.__dict__
|
|
if not name in dict:
|
|
# brand-new item
|
|
if lock:
|
|
raise TypeError, "element cannot nest other elements"
|
|
|
|
if name == "_parent":
|
|
if not (isinstance(value, XMLItem) or value is None):
|
|
raise TypeError, "only XMLItem or None is permitted"
|
|
self.__dict__[name] = value
|
|
return
|
|
|
|
# locked item so do checks
|
|
if not type(self.__dict__[name]) is type(value):
|
|
raise TypeError, "value is not the same type"
|
|
|
|
self.__dict__[name] = value
|
|
|
|
|
|
def has_contents(self):
|
|
return False
|
|
|
|
def multiline_contents(self):
|
|
return False
|
|
|
|
def s_contents(self, tfc):
|
|
assert False, "CoreElement is an abstract class; it has no contents."
|
|
|
|
def _s_start_tag_name_attrs(self, tfc):
|
|
"""
|
|
Return a string with the start tag name, and any attributes.
|
|
|
|
Wrap this in correct punctuation to get a start tag.
|
|
"""
|
|
def attr_newline(tfc):
|
|
if tfc.b_print_terse():
|
|
return " "
|
|
else:
|
|
return "\n" + tfc.s_indent(2)
|
|
|
|
lst = []
|
|
lst.append(self.tag_name)
|
|
|
|
if len(self.attrs) == 1:
|
|
# just one attr so do on one line
|
|
attr = self.attrs.keys()[0]
|
|
s_attr = '%s="%s"' % (attr, self.attrs[attr])
|
|
lst.append(" " + s_attr)
|
|
elif len(self.attrs) > 1:
|
|
# more than one attr so do a nice nested tag
|
|
# 0) show all attrs in the order of attr_names
|
|
for attr in self.attr_names:
|
|
if attr in self.attrs.keys():
|
|
s_attr = '%s="%s"' % (attr, self.attrs[attr])
|
|
lst.append(attr_newline(tfc) + s_attr)
|
|
# 1) any attrs not in attr_names? list them, too
|
|
for attr in self.attrs:
|
|
if not attr in self.attr_names:
|
|
s_attr = '%s="%s"' % (attr, self.attrs[attr])
|
|
lst.append(attr_newline(tfc) + s_attr)
|
|
|
|
return "".join(lst)
|
|
|
|
def _s_tag(self, tfc):
|
|
if not self:
|
|
if not tfc.b_print_all():
|
|
return ""
|
|
|
|
lst = []
|
|
|
|
lst.append(tfc.s_indent() + "<" + self._s_start_tag_name_attrs(tfc))
|
|
|
|
if not self.has_contents():
|
|
lst.append("/>")
|
|
else:
|
|
lst.append(">")
|
|
if self.multiline_contents():
|
|
s = "\n%s\n" % self.s_contents(tfc.indent_by(1))
|
|
lst.append(s + tfc.s_indent())
|
|
else:
|
|
lst.append(self.s_contents(tfc))
|
|
lst.append("</" + self.tag_name + ">")
|
|
|
|
return "".join(lst)
|
|
|
|
def s_start_tag(self, tfc):
|
|
return tfc.s_indent() + "<" + self._s_start_tag_name_attrs(tfc) + ">"
|
|
|
|
def s_end_tag(self):
|
|
return "</" + self.tag_name + ">"
|
|
|
|
def s_compact_tag(self, tfc):
|
|
return tfc.s_indent() + "<" + self._s_start_tag_name_attrs(tfc) + "/>"
|
|
|
|
def is_element(self):
|
|
return True
|
|
|
|
|
|
|
|
class TextElement(CoreElement):
|
|
"""
|
|
An element that cannot have other elements nested inside it.
|
|
|
|
Attributes:
|
|
attr
|
|
text
|
|
"""
|
|
def __init__(self, tag_name, def_attr, def_attr_value, attr_names = []):
|
|
CoreElement.__init__(self, tag_name, def_attr, def_attr_value,
|
|
attr_names)
|
|
self.lock = False
|
|
self.text = ""
|
|
self.lock = True
|
|
|
|
def text_check(self):
|
|
pass
|
|
|
|
def has_contents(self):
|
|
return not not self.text
|
|
|
|
def multiline_contents(self):
|
|
return self.text.find("\n") >= 0
|
|
|
|
def s_contents(self, tfc):
|
|
return self.text
|
|
|
|
|
|
|
|
class Nest(ElementItem):
|
|
"""
|
|
A data structure that can store Elements, nested inside it.
|
|
|
|
Note: this is not, itself, an Element! Because it is not an XML
|
|
element, it has no tags. Its string representation is the
|
|
representations of the elements nested inside it.
|
|
|
|
NestElement and XMLDoc inherit from this.
|
|
"""
|
|
def __init__(self):
|
|
self.lock = False
|
|
self._parent = None
|
|
self._name = ""
|
|
self.elements = []
|
|
self.lock = True
|
|
def __len__(self):
|
|
return len(self.elements)
|
|
def __getitem__(self, key):
|
|
return self.elements[key]
|
|
def __setitem__(self, key, value):
|
|
self.elements[key] = value
|
|
def __delitem__(self, key):
|
|
del(self.elements[key])
|
|
|
|
def _do_setattr(self, name, value):
|
|
if isinstance(value, XMLItem):
|
|
value._parent = self
|
|
value._name = name
|
|
self.elements.append(value)
|
|
self.__dict__[name] = value
|
|
|
|
def __setattr__(self, name, value):
|
|
# Lots of magic here! This is important stuff. Here's how it works:
|
|
#
|
|
# 0) self.lock is a boolean, set to False initially and then set
|
|
# to True at the end of __init__(). When it's False, you can add new
|
|
# members to the class instance without any sort of checks; once
|
|
# it's set True, __setattr__() starts checking assignments. By
|
|
# default, when lock is True, any assignment to an old member
|
|
# has to be of matching type. You can add a new member to the
|
|
# class instance, but __setattr__() checks to ensure that the
|
|
# new member is an XMLItem.
|
|
#
|
|
# 1) Whether self.lock is set or not, if the value is an XMLitem,
|
|
# then this will properly add the XMLItem into the tree
|
|
# structure. The XMLItem will have _parent set to the parent,
|
|
# will have _name set to its name in the parent, and will be
|
|
# added to the parent's elements list. This is handled by
|
|
# _do_setattr().
|
|
#
|
|
# 2) As a convenience for the user, if the user is assigning a
|
|
# string, and self is an XMLItem that has a .text value, this
|
|
# will assign the string to the .text value. This allows usages
|
|
# like "e.title = string", which is very nice. Before I added
|
|
# this, I frequently wrote that instead of "e.title.text =
|
|
# string" so I wanted it to just work. Likewise the user can
|
|
# assign a time value directly into Timestamp elements.
|
|
#
|
|
# 3) This checks assignments to _parent, and makes sure they are
|
|
# plausible (either an XMLItem, or None).
|
|
|
|
try:
|
|
lock = self.lock
|
|
except AttributeError:
|
|
lock = False
|
|
|
|
if not lock:
|
|
self._do_setattr(name, value)
|
|
return
|
|
|
|
dict = self.__dict__
|
|
if not name in dict:
|
|
# brand-new item
|
|
if lock:
|
|
self.nest_check()
|
|
if not isinstance(value, XMLItem):
|
|
raise TypeError, "only XMLItem is permitted"
|
|
self._do_setattr(name, value)
|
|
return
|
|
|
|
if name == "_parent" or name == "root_element":
|
|
if not (isinstance(value, XMLItem) or value is None):
|
|
raise TypeError, "only XMLItem or None is permitted"
|
|
self.__dict__[name] = value
|
|
return
|
|
|
|
if name == "_name" and type(value) == type(""):
|
|
self.__dict__[name] = value
|
|
return
|
|
|
|
# for Timestamp elements, allow this: element = time
|
|
# (where "time" is a float value, since uses float for times)
|
|
# Also allow valid timestamp strings.
|
|
if isinstance(self.__dict__[name], Timestamp):
|
|
if type(value) == type(1.0):
|
|
self.__dict__[name].time = value
|
|
return
|
|
elif type(value) == type(""):
|
|
t = utc_time_from_s_timestamp(value)
|
|
if t:
|
|
self.__dict__[name].time = t
|
|
else:
|
|
raise ValueError, "value must be a valid timestamp string"
|
|
return
|
|
|
|
# Allow string assignment to go to the .text attribute, for
|
|
# elements that allow it. All TextElements allow it;
|
|
# Elements will allow it if they do not nave nested elements.
|
|
# text_check() raises an error if it's not allowed.
|
|
if isinstance(self.__dict__[name], CoreElement) and \
|
|
type(value) == type(""):
|
|
self.__dict__[name].text_check()
|
|
self.__dict__[name].text = value
|
|
return
|
|
|
|
# locked item so do checks
|
|
if not type(self.__dict__[name]) is type(value):
|
|
raise TypeError, "value is not the same type"
|
|
|
|
self.__dict__[name] = value
|
|
|
|
def __delattr__(self, name):
|
|
# This won't be used often, if ever, but if anyone tries it, it
|
|
# should work.
|
|
if isinstance(self.name, XMLItem):
|
|
o = self.__dict__[name]
|
|
self.elements.remove(o)
|
|
del(self.__dict__[name])
|
|
else:
|
|
# REVIEW: what error should this raise?
|
|
raise TypeError, "cannot delete that item"
|
|
|
|
def nest_check(self):
|
|
pass
|
|
|
|
def is_element(self):
|
|
# a Nest is not really an element
|
|
return False
|
|
|
|
def has_contents(self):
|
|
for element in self.elements:
|
|
if element:
|
|
return True
|
|
# empty iff all of the elements were empty
|
|
return False
|
|
|
|
def __nonzero__(self):
|
|
return self.has_contents()
|
|
|
|
def multiline_contents(self):
|
|
# if there are any contents, we want multiline for nested tags
|
|
return self.has_contents()
|
|
|
|
def s_contents(self, tfc):
|
|
if len(self.elements) > 0:
|
|
# if any nested elements exist, we show those
|
|
lst = []
|
|
|
|
for element in self.elements:
|
|
s = element._s_tag(tfc)
|
|
if s:
|
|
lst.append(s)
|
|
|
|
return "\n".join(lst)
|
|
else:
|
|
return ""
|
|
|
|
assert False, "not possible to reach this line."
|
|
return ""
|
|
|
|
def s_tree(self):
|
|
level = self.level()
|
|
tup = (level, self.s_name(), self.__class__.__name__)
|
|
s = "%2d) %s (instance of %s)" % tup
|
|
lst = []
|
|
lst.append(s)
|
|
for element in self.elements:
|
|
s = element.s_tree()
|
|
lst.append(s)
|
|
return "\n".join(lst)
|
|
|
|
def _s_tag(self, tfc):
|
|
return self.s_contents(tfc)
|
|
|
|
|
|
|
|
|
|
class NestElement(Nest,CoreElement):
|
|
"""
|
|
An element that can have other elements nested inside it.
|
|
|
|
Attributes:
|
|
attr
|
|
elements: a list of other elements nested inside this one.
|
|
"""
|
|
def __init__(self, tag_name, def_attr, def_attr_value, attr_names=[]):
|
|
CoreElement.__init__(self, tag_name, def_attr, def_attr_value,
|
|
attr_names)
|
|
self.lock = False
|
|
self.elements = []
|
|
self.lock = True
|
|
|
|
def is_element(self):
|
|
return True
|
|
|
|
def __nonzero__(self):
|
|
return CoreElement.__nonzero__(self)
|
|
|
|
def _s_tag(self, tfc):
|
|
return CoreElement._s_tag(self, tfc)
|
|
|
|
|
|
|
|
class Element(NestElement,TextElement):
|
|
"""
|
|
A class to represent an arbitrary XML tag. Can either have other XML
|
|
elements nested inside it, or else can have a text string value, but
|
|
never both at the same time.
|
|
|
|
This is intended for user-defined XML tags. The user can just use
|
|
"Element" for all custom tags.
|
|
|
|
PyAtom doesn't use this; PyAtom uses TextElement for tags with a text
|
|
string value, and NestElement for tags that nest other elements. Users
|
|
can do the same, or can just use Element, as they like.
|
|
|
|
Attributes:
|
|
attr
|
|
elements: a list of other elements nested inside, if any
|
|
text: a text string value, if any
|
|
|
|
Note: if text is set, elements will be empty, and vice-versa. If you
|
|
have elements nested inside and try to set the .text, this will raise
|
|
an exception, and vice-versa.
|
|
"""
|
|
# A Element can have other elements nested inside it, or it can have
|
|
# a single ".text" string value. But never both at the same time.
|
|
# Once you nest another element, you can no longer use the .text.
|
|
def __init__(self, tag_name, def_attr, def_attr_value, attr_names=[]):
|
|
NestElement.__init__(self, tag_name, def_attr, def_attr_value,
|
|
attr_names)
|
|
self.lock = False
|
|
self.text = ""
|
|
self.lock = True
|
|
|
|
def nest_check(self):
|
|
if self.text:
|
|
raise TypeError, "Element has text contents so cannot nest"
|
|
|
|
def text_check(self):
|
|
if len(self.elements) > 0:
|
|
raise TypeError, "Element has nested elements so cannot assign text"
|
|
|
|
def has_contents(self):
|
|
return NestElement.has_contents(self) or TextElement.has_contents(self)
|
|
|
|
def multiline_contents(self):
|
|
return NestElement.has_contents(self) or self.text.find("\n") >= 0
|
|
|
|
def s_contents(self, tfc):
|
|
if len(self.elements) > 0:
|
|
return NestElement.s_contents(self, tfc)
|
|
elif self.text:
|
|
return TextElement.s_contents(self, tfc)
|
|
else:
|
|
return ""
|
|
assert False, "not possible to reach this line."
|
|
|
|
def s_tree(self):
|
|
lst = []
|
|
if len(self.elements) > 0:
|
|
level = self.level()
|
|
tup = (level, self.s_name(), self.__class__.__name__)
|
|
s = "%2d) %s (instance of %s)" % tup
|
|
lst.append(s)
|
|
for element in self.elements:
|
|
s = element.s_tree()
|
|
lst.append(s)
|
|
return "\n".join(lst)
|
|
elif self.text:
|
|
return XMLItem.s_tree(self)
|
|
else:
|
|
level = self.level()
|
|
tfc = TFC(level)
|
|
s = "%2d) %s %s" % (level, self.s_name(), "empty Element...")
|
|
return s
|
|
assert False, "not possible to reach this line."
|
|
|
|
|
|
|
|
class Collection(XMLItem):
|
|
"""
|
|
A Collection contains 0 or more Elements, but isn't an XML element.
|
|
Use where a run of 0 or more Elements of the same type is legal.
|
|
|
|
When you init your Collection, you specify what class of Element it will
|
|
contain. Attempts to append an Element of a different class will raise
|
|
an exception. Note, however, that the various Element classes all
|
|
inherit from base classes, and you can specify a class from higher up in
|
|
the inheritance tree. You could, if you wanted, make a Collection
|
|
containing "XMLItem" and then any item defined in PyAtom would be legal
|
|
in that collection. (See XMLDoc, which contains two collections of
|
|
DocItem.)
|
|
|
|
Attributes:
|
|
contains: the class of element this Collection will contain
|
|
elements: a list of other elements nested inside, if any
|
|
|
|
Note: The string representation of a Collection is just the string
|
|
representations of the elements inside it. However, a verbose string
|
|
reprentation may have an XML comment like this:
|
|
|
|
<!-- Collection of <class> with <n> elements -->
|
|
|
|
where <n> is the number of elements in the Collection and <class> is the
|
|
name of the class in this Collection.
|
|
"""
|
|
def __init__(self, element_class):
|
|
self.lock = False
|
|
self._parent = None
|
|
self._name = ""
|
|
self.elements = []
|
|
self.contains = element_class
|
|
self.lock = True
|
|
def __len__(self):
|
|
return len(self.elements)
|
|
def __getitem__(self, key):
|
|
return self.elements[key]
|
|
def __setitem__(self, key, value):
|
|
if not isinstance(value, self.contains):
|
|
raise TypeError, "object is the wrong type for this collection"
|
|
self.elements[key] = value
|
|
def __delitem__(self, key):
|
|
del(self.elements[key])
|
|
|
|
def __nonzero__(self):
|
|
# there are no attrs so if any element is nonzero, collection is too
|
|
for element in self.elements:
|
|
if element:
|
|
return True
|
|
return False
|
|
|
|
def is_element(self):
|
|
# A Collection is not really an Element
|
|
return False
|
|
|
|
def s_coll(self):
|
|
name = self.contains.__name__
|
|
n = len(self.elements)
|
|
if n == 1:
|
|
el = "element"
|
|
else:
|
|
el = "elements"
|
|
return "collection of %s with %d %s" % (name, n, el)
|
|
|
|
def append(self, element):
|
|
if not isinstance(element, self.contains):
|
|
print >> sys.stderr, "Error: attempted to insert", \
|
|
type(element).__name__, \
|
|
"into collection of", self.contains.__name__
|
|
raise TypeError, "object is the wrong type for this collection"
|
|
element._parent = self
|
|
self.elements.append(element)
|
|
|
|
def _s_tag(self, tfc):
|
|
# A collection exists only as a place to put real elements.
|
|
# There are no start or end tags...
|
|
# When tfc.b_print_all() is true, we do put an XML comment.
|
|
|
|
if not self.elements:
|
|
if not tfc.b_print_all():
|
|
return ""
|
|
|
|
lst = []
|
|
|
|
if tfc.b_print_verbose():
|
|
s = "%s%s%s%s" % (tfc.s_indent(), "<!-- ", self.s_coll(), " -->")
|
|
lst.append(s)
|
|
tfc = tfc.indent_by(1)
|
|
|
|
for element in self.elements:
|
|
s = element._s_tag(tfc)
|
|
if s:
|
|
lst.append(s)
|
|
|
|
return "\n".join(lst)
|
|
|
|
def s_tree(self):
|
|
level = self.level()
|
|
s = "%2d) %s %s" % (level, self.s_name(), self.s_coll())
|
|
lst = []
|
|
lst.append(s)
|
|
for element in self.elements:
|
|
s = element.s_tree()
|
|
lst.append(s)
|
|
return "\n".join(lst)
|
|
|
|
|
|
|
|
class XMLDeclaration(XMLItem):
|
|
# REVIEW: should this print multi-line for multiple attrs?
|
|
def __init__(self):
|
|
self._parent = None
|
|
self._name = ""
|
|
self.attrs = {}
|
|
self.attrs[s_version] = "1.0"
|
|
self.attrs[s_encoding] = "utf-8"
|
|
self.attr_names = [s_version, s_encoding, s_standalone]
|
|
|
|
def _s_tag(self, tfc):
|
|
# An XMLDeclaration() instance is never empty, so always prints.
|
|
|
|
lst = []
|
|
s = "%s%s" % (tfc.s_indent(), "<?xml")
|
|
lst.append(s)
|
|
# 0) show all attrs in the order of attr_names
|
|
for attr in self.attr_names:
|
|
if attr in self.attrs.keys():
|
|
s_attr = ' %s="%s"' % (attr, self.attrs[attr])
|
|
lst.append(s_attr)
|
|
# 1) any attrs not in attr_names? list them, too
|
|
for attr in self.attrs:
|
|
if not attr in self.attr_names:
|
|
s_attr = ' %s="%s"' % (attr, self.attrs[attr])
|
|
lst.append(s_attr)
|
|
lst.append("?>")
|
|
|
|
return "".join(lst)
|
|
|
|
def __nonzero__(self):
|
|
# Returns True because the XML Declaration is never empty.
|
|
return True
|
|
|
|
def is_element(self):
|
|
return True
|
|
|
|
|
|
|
|
class XMLDoc(Nest):
|
|
"""
|
|
A data structure to represent an XML Document. It will have the
|
|
following structure:
|
|
|
|
the XML Declaration item
|
|
0 or more document-level XML items
|
|
exactly one XML item (the "root tag")
|
|
0 or more document-level XML items
|
|
|
|
document level XML items are: Comment, PI, MarkupDecl
|
|
|
|
|
|
Attributes:
|
|
xml_decl: the XMLDeclaration item
|
|
docitems_above: a collection of DocItem (items above root_element)
|
|
root_element: the XML tag containing your data
|
|
docitems_below: a collection of DocItem (items below root_element)
|
|
|
|
Note: usually the root_element has lots of other XML items nested inside
|
|
it!
|
|
"""
|
|
def __init__(self, root_element=None):
|
|
Nest.__init__(self)
|
|
|
|
self._name = "XMLDoc"
|
|
|
|
self.xml_decl = XMLDeclaration()
|
|
self.docitems_above = Collection(DocItem)
|
|
|
|
if not root_element:
|
|
root_element = Comment("no root element yet")
|
|
self.root_element = root_element
|
|
|
|
self.docitems_below = Collection(DocItem)
|
|
|
|
def __setattr__(self, name, value):
|
|
# root_element may always be set to any ElementItem
|
|
if name == "root_element":
|
|
if not (isinstance(value, ElementItem)):
|
|
raise TypeError, "only ElementItem is permitted"
|
|
|
|
self.lock = False
|
|
# Item checks out, so assign it. root_element should only
|
|
# ever have one element, and we always put the new element
|
|
# in the same slot in elements[].
|
|
if "i_root_element" in self.__dict__:
|
|
# Assign new root_element over old one in elements[]
|
|
assert self.elements[self.i_root_element] == self.root_element
|
|
self.elements[self.i_root_element] = value
|
|
else:
|
|
# This is the first time root_element was ever set.
|
|
self.i_root_element = len(self.elements)
|
|
self.elements.append(value)
|
|
|
|
value._parent = self
|
|
value._name = name
|
|
self.__dict__[name] = value
|
|
self.lock = True
|
|
else:
|
|
# for all other, fall through to inherited behavior
|
|
Nest.__setattr__(self, name, value)
|
|
|
|
def Validate(self):
|
|
# XMLDoc never has parent. Never change this!
|
|
assert self._parent == None
|
|
return True
|
|
|
|
|
|
|
|
def local_time_from_utc_time(t):
|
|
return t - time.timezone
|
|
|
|
def utc_time_from_local_time(t):
|
|
return t + time.timezone
|
|
|
|
def local_time():
|
|
return time.time() - time.timezone
|
|
|
|
def utc_time():
|
|
return time.time()
|
|
|
|
|
|
class TimeSeq(object):
|
|
"""
|
|
A class to generate a sequence of timestamps.
|
|
|
|
Atom feed validators complain if multiple timestamps have the same
|
|
value, so this provides a convenient way to set a bunch of timestamps
|
|
all at least one second different from each other.
|
|
"""
|
|
def __init__(self, init_time=0):
|
|
if init_time == 0:
|
|
self.time = local_time()
|
|
else:
|
|
self.time = float(init_time)
|
|
def next(self):
|
|
t = self.time
|
|
self.time += 1.0
|
|
return t
|
|
|
|
format_RFC3339 = "%Y-%m-%dT%H:%M:%S"
|
|
|
|
def parse_time_offset(s):
|
|
s = s.lstrip().rstrip()
|
|
|
|
if (s == '' or s == 'Z' or s == 'z'):
|
|
return 0
|
|
|
|
m = pat_time_offset.search(s)
|
|
sign = m.group(1)
|
|
offset_hour = int(m.group(2))
|
|
offset_min = int(m.group(3))
|
|
offset = offset_hour * 3600 + offset_min * 60
|
|
if sign == "-":
|
|
offset *= -1
|
|
return offset
|
|
|
|
def s_timestamp(utc_time, time_offset="Z"):
|
|
"""
|
|
Format a time and offset into a string.
|
|
|
|
utc_time
|
|
a floating-point value, time in the UTC time zone
|
|
s_time_offset
|
|
a string specifying an offset from UTC. Examples:
|
|
z or Z -- offset is 0 ("Zulu" time, UTC, aka GMT)
|
|
-08:00 -- 8 hours earlier than UTC (Pacific time zone)
|
|
"" -- empty string is technically not legal, but may work
|
|
|
|
Notes:
|
|
Returned string complies with RFC3339; uses ISO8601 date format.
|
|
Example: 2003-12-13T18:30:02Z
|
|
Example: 2003-12-13T18:30:02+02:00
|
|
"""
|
|
|
|
if not utc_time:
|
|
return ""
|
|
|
|
utc_time += parse_time_offset(time_offset)
|
|
|
|
try:
|
|
s = time.strftime(format_RFC3339, time.localtime(utc_time))
|
|
except:
|
|
return ""
|
|
|
|
return s + time_offset
|
|
|
|
|
|
|
|
pat_RFC3339 = re.compile("(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)(.*)")
|
|
pat_time_offset = re.compile("([+-])(\d\d):(\d\d)")
|
|
|
|
def utc_time_from_s_timestamp(s_date_time_stamp):
|
|
# parse RFC3339-compatible times that use ISO8601 date format
|
|
# date time stamp example: 2003-12-13T18:30:02Z
|
|
# date time stamp example: 2003-12-13T18:30:02+02:00
|
|
# leaving off the suffix is technically not legal, but allowed
|
|
|
|
s_date_time_stamp = s_date_time_stamp.lstrip().rstrip()
|
|
|
|
try:
|
|
m = pat_RFC3339.search(s_date_time_stamp)
|
|
year = int(m.group(1))
|
|
mon = int(m.group(2))
|
|
mday = int(m.group(3))
|
|
hour = int(m.group(4))
|
|
min = int(m.group(5))
|
|
sec = int(m.group(6))
|
|
tup = (year, mon, mday, hour, min, sec, -1, -1, -1)
|
|
t = time.mktime(tup)
|
|
|
|
s = m.group(7)
|
|
t += parse_time_offset(s)
|
|
|
|
return t
|
|
|
|
except:
|
|
return 0.0
|
|
|
|
assert False, "impossible to reach this line"
|
|
|
|
|
|
def s_time_offset():
|
|
"""
|
|
Return a string with local offset from UTC in RFC3339 format.
|
|
"""
|
|
|
|
# If t is set to local time in seconds since the epoch, then...
|
|
# ...offset is the value you add to t to get UTC. This is the
|
|
# reverse of time.timezone.
|
|
|
|
offset = -(time.timezone)
|
|
|
|
if offset > 0:
|
|
sign = "+"
|
|
else:
|
|
sign = "-"
|
|
offset = abs(offset)
|
|
|
|
offset_hour = offset // (60 * 60)
|
|
offset_min = (offset // 60) % 60
|
|
return "%s%02d:%02d" % (sign, offset_hour, offset_min)
|
|
|
|
s_offset_local = s_time_offset()
|
|
|
|
s_offset_default = s_offset_local
|
|
|
|
def set_default_time_offset(s):
|
|
global s_offset_default
|
|
s_offset_default = s
|
|
|
|
|
|
class Timestamp(CoreElement):
|
|
def __init__(self, tag_name, time=0.0):
|
|
CoreElement.__init__(self, tag_name, None, None)
|
|
self.lock = False
|
|
self.time = time
|
|
self.time_offset = s_offset_default
|
|
self.lock = True
|
|
|
|
def __delattr__(self, name):
|
|
CoreElement.__delattr_(self, name)
|
|
|
|
def __getattr__(self, name):
|
|
if name == "text":
|
|
return s_timestamp(self.time, self.time_offset)
|
|
return CoreElement.__getattr_(self, name)
|
|
|
|
def __setattr__(self, name, value):
|
|
if name == "text":
|
|
if type(value) != type(""):
|
|
raise TypeError, "can only assign a string to .text"
|
|
t = utc_time_from_s_timestamp(value)
|
|
if t:
|
|
self.time = utc_time_from_s_timestamp(value)
|
|
else:
|
|
raise ValueError, "value must be a valid timestamp string"
|
|
return
|
|
CoreElement.__setattr__(self, name, value)
|
|
|
|
def has_contents(self):
|
|
return self.time != 0
|
|
|
|
def multiline_contents(self):
|
|
return False
|
|
|
|
def s_contents(self, tfc):
|
|
return s_timestamp(self.time, self.time_offset)
|
|
|
|
def update(self):
|
|
self.time = local_time()
|
|
return self
|
|
|
|
|
|
|
|
|
|
# Below are all the classes to implement Atom using the above tools.
|
|
|
|
|
|
|
|
class AtomText(TextElement):
|
|
def __init__(self, tag_name):
|
|
attr_names = [ s_type ]
|
|
# legal values of type: "text", "html", "xhtml"
|
|
TextElement.__init__(self, tag_name, None, None, attr_names)
|
|
|
|
class Title(AtomText):
|
|
def __init__(self, text=""):
|
|
AtomText.__init__(self, "title")
|
|
self.text = text
|
|
|
|
class Subtitle(AtomText):
|
|
def __init__(self, text=""):
|
|
AtomText.__init__(self, "subtitle")
|
|
self.text = text
|
|
|
|
class Content(AtomText):
|
|
def __init__(self, text=""):
|
|
AtomText.__init__(self, "content")
|
|
self.text = text
|
|
|
|
class Summary(AtomText):
|
|
def __init__(self, text=""):
|
|
AtomText.__init__(self, "summary")
|
|
self.text = text
|
|
|
|
class Rights(AtomText):
|
|
def __init__(self, text=""):
|
|
AtomText.__init__(self, "rights")
|
|
self.text = text
|
|
|
|
class Id(TextElement):
|
|
def __init__(self, text=""):
|
|
TextElement.__init__(self, "id", None, None)
|
|
self.text = text
|
|
|
|
class Generator(TextElement):
|
|
def __init__(self):
|
|
attr_names = [ "uri", "version" ]
|
|
TextElement.__init__(self, "generator", None, None, attr_names)
|
|
|
|
class Category(TextElement):
|
|
def __init__(self, term_val=""):
|
|
attr_names = [s_term, "scheme", "label"]
|
|
TextElement.__init__(self, "category", s_term, term_val, attr_names)
|
|
|
|
class Link(TextElement):
|
|
def __init__(self, href_val=""):
|
|
attr_names = [
|
|
s_href, "rel", "type", "hreflang", "title", "length", s_lang]
|
|
TextElement.__init__(self, "link", s_href, href_val, attr_names)
|
|
|
|
class Icon(TextElement):
|
|
def __init__(self):
|
|
TextElement.__init__(self, "icon", None, None)
|
|
|
|
class Logo(TextElement):
|
|
def __init__(self):
|
|
TextElement.__init__(self, "logo", None, None)
|
|
|
|
class Name(TextElement):
|
|
def __init__(self, text=""):
|
|
TextElement.__init__(self, "name", None, None)
|
|
self.text = text
|
|
|
|
class Email(TextElement):
|
|
def __init__(self):
|
|
TextElement.__init__(self, "email", None, None)
|
|
|
|
class Uri(TextElement):
|
|
def __init__(self):
|
|
TextElement.__init__(self, "uri", None, None)
|
|
|
|
|
|
|
|
class BasicAuthor(NestElement):
|
|
def __init__(self, tag_name, name):
|
|
NestElement.__init__(self, tag_name, None, None)
|
|
self.name = Name(name)
|
|
self.email = Email()
|
|
self.uri = Uri()
|
|
|
|
class Author(BasicAuthor):
|
|
def __init__(self, name=""):
|
|
BasicAuthor.__init__(self, "author", name)
|
|
|
|
class Contributor(BasicAuthor):
|
|
def __init__(self, name=""):
|
|
BasicAuthor.__init__(self, "contributor", name)
|
|
|
|
|
|
|
|
class Updated(Timestamp):
|
|
def __init__(self, time=0.0):
|
|
Timestamp.__init__(self, "updated", time)
|
|
|
|
class Published(Timestamp):
|
|
def __init__(self, time=0.0):
|
|
Timestamp.__init__(self, "published", time)
|
|
|
|
|
|
|
|
class FeedElement(NestElement):
|
|
def __init__(self, tag_name):
|
|
NestElement.__init__(self, tag_name, None, None)
|
|
|
|
self.title = Title("")
|
|
self.id = Id("")
|
|
self.updated = Updated()
|
|
self.authors = Collection(Author)
|
|
self.links = Collection(Link)
|
|
|
|
self.subtitle = Subtitle("")
|
|
self.categories = Collection(Category)
|
|
self.contributors = Collection(Contributor)
|
|
self.generator = Generator()
|
|
self.icon = Icon()
|
|
self.logo = Logo()
|
|
self.rights = Rights("")
|
|
|
|
class Feed(FeedElement):
|
|
def __init__(self):
|
|
FeedElement.__init__(self, "feed")
|
|
self.attrs["xmlns"] = "http://www.w3.org/2005/Atom"
|
|
self.title.text = "Title of Feed Goes Here"
|
|
self.id.text = "ID of Feed Goes Here"
|
|
self.entries = Collection(Entry)
|
|
|
|
class Source(FeedElement):
|
|
def __init__(self):
|
|
FeedElement.__init__(self, "source")
|
|
|
|
|
|
|
|
class Entry(NestElement):
|
|
def __init__(self):
|
|
NestElement.__init__(self, "entry", None, None)
|
|
self.title = Title("Title of Entry Goes Here")
|
|
self.id = Id("ID of Entry Goes Here")
|
|
self.updated = Updated()
|
|
self.authors = Collection(Author)
|
|
self.links = Collection(Link)
|
|
|
|
self.content = Content("")
|
|
self.summary = Summary("")
|
|
self.categories = Collection(Category)
|
|
self.contributors = Collection(Contributor)
|
|
self.published = Published()
|
|
self.source = Source()
|
|
self.rights = Rights("")
|
|
|
|
|
|
|
|
def diff(s0, name0, s1, name1):
|
|
from difflib import ndiff
|
|
lst0 = s0.split("\n")
|
|
lst1 = s1.split("\n")
|
|
report = '\n'.join(ndiff(lst0, lst1))
|
|
return report
|
|
|
|
|
|
def run_test_cases():
|
|
|
|
# The default is to make time stamps in local time with appropriate
|
|
# offset; for our tests, we want a default "Z" offset instead.
|
|
set_default_time_offset("Z")
|
|
|
|
failed_tests = 0
|
|
|
|
|
|
# Test: convert current time into a timestamp string and back
|
|
|
|
now = local_time()
|
|
# timestamp format does not allow fractional seconds
|
|
now = float(int(now)) # truncate any fractional seconds
|
|
s = s_timestamp(now)
|
|
t = utc_time_from_s_timestamp(s)
|
|
if now != t:
|
|
failed_tests += 1
|
|
print "test case failed:"
|
|
print now, "-- original timestamp"
|
|
print t, "-- converted timestamp does not match"
|
|
|
|
|
|
# Test: convert a timestamp string to a time value and back
|
|
|
|
s_time = "2003-12-13T18:30:02Z"
|
|
t = utc_time_from_s_timestamp(s_time)
|
|
s = s_timestamp(t)
|
|
if s_time != s:
|
|
failed_tests += 1
|
|
print "test case failed:"
|
|
print s_time, "-- original timestamp"
|
|
print s, "-- converted timestamp does not match"
|
|
|
|
|
|
# Test: generate the "Atom-Powered Robots Run Amok" example
|
|
#
|
|
# Note: the original had some of the XML declarations in
|
|
# a different order than PyAtom puts them. I swapped around
|
|
# the lines here so they would match the PyAtom order. Other
|
|
# than that, this is the example from:
|
|
#
|
|
# http://www.atomenabled.org/developers/syndication/#sampleFeed
|
|
|
|
s_example = """\
|
|
<?xml version="1.0" encoding="utf-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<title>Example Feed</title>
|
|
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
|
<updated>2003-12-13T18:30:02Z</updated>
|
|
<author>
|
|
<name>John Doe</name>
|
|
</author>
|
|
<link href="http://example.org/"/>
|
|
<entry>
|
|
<title>Atom-Powered Robots Run Amok</title>
|
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
|
<updated>2003-12-13T18:30:02Z</updated>
|
|
<link href="http://example.org/2003/12/13/atom03"/>
|
|
<summary>Some text.</summary>
|
|
</entry>
|
|
</feed>"""
|
|
|
|
xmldoc = XMLDoc()
|
|
|
|
feed = Feed()
|
|
xmldoc.root_element = feed
|
|
|
|
feed.title = "Example Feed"
|
|
feed.id = "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6"
|
|
feed.updated = "2003-12-13T18:30:02Z"
|
|
|
|
link = Link("http://example.org/")
|
|
feed.links.append(link)
|
|
|
|
author = Author("John Doe")
|
|
feed.authors.append(author)
|
|
|
|
|
|
entry = Entry()
|
|
feed.entries.append(entry)
|
|
entry.id = "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"
|
|
entry.title = "Atom-Powered Robots Run Amok"
|
|
entry.updated = "2003-12-13T18:30:02Z"
|
|
entry.summary = "Some text."
|
|
|
|
link = Link("http://example.org/2003/12/13/atom03")
|
|
entry.links.append(link)
|
|
|
|
|
|
s = str(xmldoc)
|
|
if s_example != s:
|
|
failed_tests += 1
|
|
print "test case failed:"
|
|
print "The generated XML doesn't match the example. diff follows:"
|
|
print diff(s_example, "s_example", s, "s")
|
|
|
|
|
|
# Test: verify that xmldoc.Validate() succeeds
|
|
|
|
if not xmldoc.Validate():
|
|
failed_tests += 1
|
|
print "test case failed:"
|
|
print "xmldoc.Validate() failed."
|
|
|
|
|
|
# Test: does Element work both nested an non-nested?
|
|
s_test = """\
|
|
<test>
|
|
<test:agent number="007">James Bond</test:agent>
|
|
<test:pet
|
|
nickname="Mei-Mei"
|
|
type="cat">Matrix</test:pet>
|
|
</test>"""
|
|
|
|
class TestPet(Element):
|
|
def __init__(self, name=""):
|
|
Element.__init__(self, "test:pet", None, None)
|
|
self.text = name
|
|
|
|
class TestAgent(Element):
|
|
def __init__(self, name=""):
|
|
Element.__init__(self, "test:agent", None, None)
|
|
self.text = name
|
|
|
|
class Test(Element):
|
|
def __init__(self):
|
|
Element.__init__(self, "test", None, None)
|
|
self.test_agent = TestAgent()
|
|
self.test_pet = TestPet()
|
|
|
|
test = Test()
|
|
test.test_agent = "James Bond"
|
|
test.test_agent.attrs["number"] = "007"
|
|
test.test_pet = "Matrix"
|
|
test.test_pet.attrs["type"] = "cat"
|
|
test.test_pet.attrs["nickname"] = "Mei-Mei"
|
|
|
|
s = str(test)
|
|
if s_test != s:
|
|
failed_tests += 1
|
|
print "test case failed:"
|
|
print "test output doesn't match. diff follows:"
|
|
print diff(s_test, "s_test", s, "s")
|
|
|
|
|
|
if failed_tests > 0:
|
|
print "self-test failed!"
|
|
else:
|
|
print "self-test successful."
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run_test_cases()
|