Replaced libxml2.parseDoc with htmlParseDoc for HTML files, because parseDoc

doesn't work for HTML entities like   and so on. This created another problem, because htmlParseDoc doesn't seem to handle namespaces properly: doc.ns() doesn't work. Fixed (I hope so).
2004-02-02 22:43:22 +00:00 · 2004-02-02 22:43:22 +00:00 · d7063d3448
parent db428baca3
commit d7063d3448
6 changed files with 37 additions and 24 deletions
--- a/extra-modules/elections.py
+++ b/extra-modules/elections.py
@ -99,7 +99,7 @@ class Election(dataholders.Xml):
            div.append(subjectDiv)
            spipText = voteSubjects[votePageNode]
            htmlText = "<spip>%s</spip>" % parsers.makeHtmlFromSpip(spipText)
-            doc = libxml2.parseDoc(htmlText)
+            doc = libxml2.htmlParseDoc(htmlText, 'UTF-8')
            node = doc.getRootElement().children
            while node is not None:
                subjectDiv.append(node)
--- a/src/core/dataholders.py
+++ b/src/core/dataholders.py
@ -560,27 +560,35 @@ class Xml(DataHolder):
                    dataFile = file(absolutePath, "rb")
                    data = dataFile.read()
                    dataFile.close()
-            self.doc = libxml2.parseDoc(data)        
+            if self.mimeType == 'text/html':
+                self.doc = libxml2.htmlParseDoc(data, 'UTF-8')
+            else:
+                self.doc = libxml2.parseDoc(data)
            #self.context = self.doc.xpathNewContext()

        node = self.node
-        namespaceUri = node.ns().content
-        namespaceNames = {
-            "http://abracadabra.entrouvert.org/0.0": None,
-            "http://www.w3.org/1999/xhtml": "html",
-            "http://www.w3.org/2002/xforms": "xforms",
-            "http://www.w3.org/2001/XMLSchema": "xsd",
-            }
-        if namespaceUri in namespaceNames:
-            namespaceName = namespaceNames[namespaceUri]
+        if self.mimeType == 'text/html':
+            # This instruction fails with htmlParseDoc:
+            # namespaceUri = node.ns().content
+            namespaceName = 'html'
        else:
-            context.getVar("logger").debug(
-                """Unknown XML namespace URI = "%s".""" % namespaceUri)
-            namespaceName = "???"
+            namespaceUri = node.ns().content
+            namespaceNames = {
+                'http://abracadabra.entrouvert.org/0.0': None,
+                'http://www.w3.org/1999/xhtml': 'html',
+                'http://www.w3.org/2002/xforms': 'xforms',
+                'http://www.w3.org/2001/XMLSchema': 'xsd',
+                }
+            if namespaceUri in namespaceNames:
+                namespaceName = namespaceNames[namespaceUri]
+            else:
+                context.getVar('logger').debug(
+                    'Unknown XML namespace URI = "%s".' % namespaceUri)
+                namespaceName = "???"
        if namespaceName is None:
            name = node.name
        else:
-            name = "%s:%s" % (namespaceName, node.name)
+            name = '%s:%s' % (namespaceName, node.name)

        realClass = modules.getXmlClass(name)
        if realClass != self.__class__:
@ -656,7 +664,10 @@ class Html(Xml):

    def fillPageLayout(self, layout):
        self.convertInternUris()
-        nodes = self.evaluateXpath("html:body/*")
+        # FIXME: When the document is parsed by htmlParseDoc, there is no
+        # namespace.
+        #nodes = self.evaluateXpath("html:body/*")
+        nodes = self.evaluateXpath("body/*")
        if not nodes:
            return False
        filled = False
@ -664,7 +675,7 @@ class Html(Xml):
            if node is None:
                continue
            filled = True
-            layout.append(node )
+            layout.append(node)
        return True

    def outputFullPage(self):
--- a/src/core/xmlschemas.py
+++ b/src/core/xmlschemas.py
@ -316,7 +316,7 @@ class Spip(Type):
    def convertValueNodeToHtmlNodes(self, valueNode):
        htmlText = "<spip>%s</spip>" % parsers.makeHtmlFromSpip(
            valueNode.content)
-        doc = libxml2.parseDoc(htmlText)
+        doc = libxml2.htmlParseDoc(htmlText, 'UTF-8')
        node = doc.getRootElement().children
        result = []
        while node is not None:
--- a/src/modules/xforms/descriptions.py
+++ b/src/modules/xforms/descriptions.py
@ -979,7 +979,7 @@ class Spip(Control):
        content = self.node.content
        if content:
            htmlText = "<spip>%s</spip>" % parsers.makeHtmlFromSpip(content)
-            doc = libxml2.parseDoc(htmlText)
+            doc = libxml2.htmlParseDoc(htmlText, 'UTF-8')
            node = doc.getRootElement().children
            while node is not None:
                layout.append(node)
--- a/vhosts/abracadabra/about.html
+++ b/vhosts/abracadabra/about.html
@ -1,7 +1,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
 	<head>
 		<title>À propos de Glasnost [0d]</title>
-		<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
+		<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />

 		<link rel="stylesheet" href="/css/0d.css" />
 		
@ -53,12 +53,12 @@
 </dd>
 </dl>
 <hr />
-<p>Copyright (c)  2000, 2001 <a class="reference" href="http://www.easter-eggs.com" title="www.easter-eggs.com">Easter-eggs</a> &amp; Emmanuel
+<p>Copyright &copy;  2000, 2001 <a class="reference" href="http://www.easter-eggs.com" title="www.easter-eggs.com">Easter-eggs</a> &amp; Emmanuel
 Raviart</p>
-<p>Copyright (c)  2002 Odile Bénassy, <a class="reference" href="http://www.codelutin.com" title="www.codelutin.com">Code Lutin</a>,
+<p>Copyright &copy;  2002 Odile Bénassy, <a class="reference" href="http://www.codelutin.com" title="www.codelutin.com">Code Lutin</a>,
 Thierry Dulieu, <a class="reference" href="http://www.easter-eggs.com" title="www.easter-eggs.com">Easter-eggs</a>, <a class="reference" href="http://www.entrouvert.com" title="www.entrouvert.com">Entr'ouvert</a>, Frédéric Péters, Benjamin Poussin, Emmanuel
 Raviart, Emmanuel Saracco &amp; <a class="reference" href="http://www.theridion.com" title="www.theridion.com">Théridion</a>.</p>
-<p>Copyright (c)  2003 Odile Bénassy, Romain Chantereau, Nicolas Clapiès, <a class="reference" href="http://www.codelutin.com" title="www.codelutin.com">Code
+<p>Copyright &copy;  2003 Odile Bénassy, Romain Chantereau, Nicolas Clapiès, <a class="reference" href="http://www.codelutin.com" title="www.codelutin.com">Code
 Lutin</a>, Pierre-Antoine Dejace, Thierry Dulieu,
 <a class="reference" href="http://www.easter-eggs.com" title="www.easter-eggs.com">Easter-eggs</a>, <a class="reference" href="http://www.entrouvert.com" title="www.entrouvert.com">Entr'ouvert</a>, Florent Monnier, Cédric Musso, <a class="reference" href="http://ouvaton.coop" title="ouvaton.coop">Ouvaton</a>, Frédéric Péters, Benjamin Poussin, Rodolphe
 Quiédeville, Emmanuel Raviart, Sébastien Régnier, Emmanuel Saracco, <a class="reference" href="http://www.theridion.com" title="www.theridion.com">Théridion</a> &amp; <a class="reference" href="http://www.vecam.org" title="www.vecam.org">Vecam</a>.</p>
@ -95,4 +95,3 @@ la <a class="reference" href="/gpl" title="GNU General Public License">Licence P

 	</body>
 </html>
-
--- a/vhosts/abracadabra/index.xml
+++ b/vhosts/abracadabra/index.xml
@ -8,6 +8,9 @@
 <users mode="edit">
  <everybody/>
 </users>
+ <users mode="full-page">
+  <everybody/>
+ </users>
 <users mode="new">
  <everybody/>
 </users>