144 lines
4.3 KiB
Python
144 lines
4.3 KiB
Python
"""Various functions for dealing with HTML.
|
|
|
|
These functions are fairly simple but it is critical that they be
|
|
used correctly. Many security problems are caused by escaping errors
|
|
(cross site scripting is one example). The HTML and XML standards on
|
|
www.w3c.org and www.xml.com should be studied, especially the sections
|
|
on character sets, entities, attribute and values.
|
|
|
|
htmltext and htmlescape
|
|
-----------------------
|
|
|
|
This type and function are meant to be used with [html] PTL template type.
|
|
The htmltext type designates data that does not need to be escaped and the
|
|
htmlescape() function calls str() on the argment, escapes the resulting
|
|
string and returns a htmltext instance. htmlescape() does nothing to
|
|
htmltext instances.
|
|
|
|
url_quote
|
|
---------
|
|
|
|
Use for quoting data to be included as part of a URL, for example:
|
|
|
|
input = "foo bar"
|
|
...
|
|
'<a href="/search?keyword=%s">' % url_quote(input)
|
|
|
|
Note that URLs are usually used as attribute values and might need to have
|
|
HTML special characters escaped. As an example of incorrect usage:
|
|
|
|
url = 'http://example.com/?a=1©=0' # INCORRECT
|
|
url = 'http://example.com/?a=1&copy=0' # CORRECT
|
|
...
|
|
'<a href="%s">do something</a>' % url
|
|
|
|
Old browsers would treat "©" as an entity reference and replace it with
|
|
the copyright character. XML processors should treat it as an invalid entity
|
|
reference.
|
|
"""
|
|
|
|
import re
|
|
import urllib
|
|
|
|
try:
|
|
# faster C implementation
|
|
from quixote.html._c_htmltext import htmltext, htmlescape, \
|
|
stringify, TemplateIO
|
|
except ImportError:
|
|
from quixote.html._py_htmltext import htmltext, htmlescape, \
|
|
stringify, TemplateIO
|
|
|
|
ValuelessAttr = object() # magic singleton object
|
|
|
|
def htmltag(tag, xml_end=False, css_class=None, **attrs):
|
|
"""Create a HTML tag.
|
|
"""
|
|
r = ["<%s" % tag]
|
|
if css_class is not None:
|
|
attrs['class'] = css_class
|
|
for (attr, val) in attrs.items():
|
|
if val is ValuelessAttr:
|
|
val = attr
|
|
if val is not None:
|
|
r.append(' %s="%s"' % (attr,
|
|
stringify(htmlescape(val))))
|
|
if xml_end:
|
|
r.append(" />")
|
|
else:
|
|
r.append(">")
|
|
return htmltext("".join(r))
|
|
|
|
|
|
def href(url, text, title=None, **attrs):
|
|
return (htmltag("a", href=url, title=title, **attrs) +
|
|
htmlescape(text) +
|
|
htmltext("</a>"))
|
|
|
|
def url_with_query(path, **attrs):
|
|
result = htmltext(url_quote(path))
|
|
if attrs:
|
|
result += "?" + "&".join([url_quote(key) + "=" + url_quote(value)
|
|
for key, value in attrs.items()])
|
|
return result
|
|
|
|
def nl2br(value):
|
|
"""nl2br(value : any) -> htmltext
|
|
|
|
Insert <br /> tags before newline characters.
|
|
"""
|
|
text = htmlescape(value)
|
|
return htmltext(text.s.replace('\n', '<br />\n'))
|
|
|
|
|
|
def url_quote(value, fallback=None):
|
|
"""url_quote(value : any [, fallback : string]) -> string
|
|
|
|
Quotes 'value' for use in a URL; see urllib.quote(). If value is None,
|
|
then the behavior depends on the fallback argument. If it is not
|
|
supplied then an error is raised. Otherwise, the fallback value is
|
|
returned unquoted.
|
|
"""
|
|
if value is None:
|
|
if fallback is None:
|
|
raise ValueError("value is None and no fallback supplied")
|
|
else:
|
|
return fallback
|
|
return urllib.quote(stringify(value))
|
|
|
|
_saved = None
|
|
def use_qpy():
|
|
"""
|
|
Switch to using 'qpy' as an alternative.
|
|
"""
|
|
import qpy
|
|
from qpy_templateio import qpy_TemplateIO
|
|
|
|
global _saved, htmltext, stringify, htmlescape, TemplateIO
|
|
if not _saved:
|
|
_saved = (htmltext, stringify, htmlescape, TemplateIO)
|
|
|
|
htmltext = qpy.h8
|
|
stringify = qpy.stringify
|
|
htmlescape = qpy.h8.quote
|
|
TemplateIO = qpy_TemplateIO
|
|
|
|
def cleanup_qpy():
|
|
global _saved, htmltext, stringify, htmlescape, TemplateIO
|
|
|
|
(htmltext, stringify, htmlescape, TemplateIO) = _saved
|
|
_saved = None
|
|
|
|
|
|
_ETAGO_PAT = re.compile(r'</')
|
|
|
|
def js_escape(s):
|
|
"""Escape Javascript code to be embedded in HTML.
|
|
|
|
When embedding Javascript code inside a <script> tag, the ETAGO
|
|
(i.e. the two character sequence "</") must be escaped to avoid
|
|
premature ending of the script element.
|
|
"""
|
|
# assume the sequence occurs inside a string, use backslash escape
|
|
s = stringify(s)
|
|
return htmltext(_ETAGO_PAT.sub(r'<\/', s))
|