Replaced libxml2.parseDoc with htmlParseDoc for HTML files, because parseDoc

doesn't work for HTML entities like   and so on.

This created another problem, because htmlParseDoc doesn't seem to handle
namespaces properly: doc.ns() doesn't work. Fixed (I hope so).
This commit is contained in:
eraviart 2004-02-02 22:43:22 +00:00
parent db428baca3
commit d7063d3448
6 changed files with 37 additions and 24 deletions

View File

@ -99,7 +99,7 @@ class Election(dataholders.Xml):
div.append(subjectDiv)
spipText = voteSubjects[votePageNode]
htmlText = "<spip>%s</spip>" % parsers.makeHtmlFromSpip(spipText)
doc = libxml2.parseDoc(htmlText)
doc = libxml2.htmlParseDoc(htmlText, 'UTF-8')
node = doc.getRootElement().children
while node is not None:
subjectDiv.append(node)

View File

@ -560,27 +560,35 @@ class Xml(DataHolder):
dataFile = file(absolutePath, "rb")
data = dataFile.read()
dataFile.close()
self.doc = libxml2.parseDoc(data)
if self.mimeType == 'text/html':
self.doc = libxml2.htmlParseDoc(data, 'UTF-8')
else:
self.doc = libxml2.parseDoc(data)
#self.context = self.doc.xpathNewContext()
node = self.node
namespaceUri = node.ns().content
namespaceNames = {
"http://abracadabra.entrouvert.org/0.0": None,
"http://www.w3.org/1999/xhtml": "html",
"http://www.w3.org/2002/xforms": "xforms",
"http://www.w3.org/2001/XMLSchema": "xsd",
}
if namespaceUri in namespaceNames:
namespaceName = namespaceNames[namespaceUri]
if self.mimeType == 'text/html':
# This instruction fails with htmlParseDoc:
# namespaceUri = node.ns().content
namespaceName = 'html'
else:
context.getVar("logger").debug(
"""Unknown XML namespace URI = "%s".""" % namespaceUri)
namespaceName = "???"
namespaceUri = node.ns().content
namespaceNames = {
'http://abracadabra.entrouvert.org/0.0': None,
'http://www.w3.org/1999/xhtml': 'html',
'http://www.w3.org/2002/xforms': 'xforms',
'http://www.w3.org/2001/XMLSchema': 'xsd',
}
if namespaceUri in namespaceNames:
namespaceName = namespaceNames[namespaceUri]
else:
context.getVar('logger').debug(
'Unknown XML namespace URI = "%s".' % namespaceUri)
namespaceName = "???"
if namespaceName is None:
name = node.name
else:
name = "%s:%s" % (namespaceName, node.name)
name = '%s:%s' % (namespaceName, node.name)
realClass = modules.getXmlClass(name)
if realClass != self.__class__:
@ -656,7 +664,10 @@ class Html(Xml):
def fillPageLayout(self, layout):
self.convertInternUris()
nodes = self.evaluateXpath("html:body/*")
# FIXME: When the document is parsed by htmlParseDoc, there is no
# namespace.
#nodes = self.evaluateXpath("html:body/*")
nodes = self.evaluateXpath("body/*")
if not nodes:
return False
filled = False
@ -664,7 +675,7 @@ class Html(Xml):
if node is None:
continue
filled = True
layout.append(node )
layout.append(node)
return True
def outputFullPage(self):

View File

@ -316,7 +316,7 @@ class Spip(Type):
def convertValueNodeToHtmlNodes(self, valueNode):
htmlText = "<spip>%s</spip>" % parsers.makeHtmlFromSpip(
valueNode.content)
doc = libxml2.parseDoc(htmlText)
doc = libxml2.htmlParseDoc(htmlText, 'UTF-8')
node = doc.getRootElement().children
result = []
while node is not None:

View File

@ -979,7 +979,7 @@ class Spip(Control):
content = self.node.content
if content:
htmlText = "<spip>%s</spip>" % parsers.makeHtmlFromSpip(content)
doc = libxml2.parseDoc(htmlText)
doc = libxml2.htmlParseDoc(htmlText, 'UTF-8')
node = doc.getRootElement().children
while node is not None:
layout.append(node)

View File

@ -1,7 +1,7 @@
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>À propos de Glasnost [0d]</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<link rel="stylesheet" href="/css/0d.css" />
@ -53,12 +53,12 @@
</dd>
</dl>
<hr />
<p>Copyright (c) 2000, 2001 <a class="reference" href="http://www.easter-eggs.com" title="www.easter-eggs.com">Easter-eggs</a> &amp; Emmanuel
<p>Copyright &copy; 2000, 2001 <a class="reference" href="http://www.easter-eggs.com" title="www.easter-eggs.com">Easter-eggs</a> &amp; Emmanuel
Raviart</p>
<p>Copyright (c) 2002 Odile Bénassy, <a class="reference" href="http://www.codelutin.com" title="www.codelutin.com">Code Lutin</a>,
<p>Copyright &copy; 2002 Odile Bénassy, <a class="reference" href="http://www.codelutin.com" title="www.codelutin.com">Code Lutin</a>,
Thierry Dulieu, <a class="reference" href="http://www.easter-eggs.com" title="www.easter-eggs.com">Easter-eggs</a>, <a class="reference" href="http://www.entrouvert.com" title="www.entrouvert.com">Entr'ouvert</a>, Frédéric Péters, Benjamin Poussin, Emmanuel
Raviart, Emmanuel Saracco &amp; <a class="reference" href="http://www.theridion.com" title="www.theridion.com">Théridion</a>.</p>
<p>Copyright (c) 2003 Odile Bénassy, Romain Chantereau, Nicolas Clapiès, <a class="reference" href="http://www.codelutin.com" title="www.codelutin.com">Code
<p>Copyright &copy; 2003 Odile Bénassy, Romain Chantereau, Nicolas Clapiès, <a class="reference" href="http://www.codelutin.com" title="www.codelutin.com">Code
Lutin</a>, Pierre-Antoine Dejace, Thierry Dulieu,
<a class="reference" href="http://www.easter-eggs.com" title="www.easter-eggs.com">Easter-eggs</a>, <a class="reference" href="http://www.entrouvert.com" title="www.entrouvert.com">Entr'ouvert</a>, Florent Monnier, Cédric Musso, <a class="reference" href="http://ouvaton.coop" title="ouvaton.coop">Ouvaton</a>, Frédéric Péters, Benjamin Poussin, Rodolphe
Quiédeville, Emmanuel Raviart, Sébastien Régnier, Emmanuel Saracco, <a class="reference" href="http://www.theridion.com" title="www.theridion.com">Théridion</a> &amp; <a class="reference" href="http://www.vecam.org" title="www.vecam.org">Vecam</a>.</p>
@ -95,4 +95,3 @@ la <a class="reference" href="/gpl" title="GNU General Public License">Licence P
</body>
</html>

View File

@ -8,6 +8,9 @@
<users mode="edit">
<everybody/>
</users>
<users mode="full-page">
<everybody/>
</users>
<users mode="new">
<everybody/>
</users>