unitex.py, to unite Mallard pages into a single hierarchical page

2014-05-16 00:24:03 +02:00 · 2014-05-16 00:24:03 +02:00 · 258d2d04c2
parent 45675cf1a8
commit 258d2d04c2
1 changed files with 149 additions and 0 deletions
--- a/unitex.py
+++ b/unitex.py
@ -0,0 +1,149 @@
+#! /usr/bin/env python
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+# This script unites Mallard pages into a single hierarchical page, to
+# be processed to create a PDF document.
+
+import xml.etree.ElementTree as ET
+import subprocess
+from optparse import OptionParser
+import os
+import sys
+import tempfile
+
+xrefs = []
+
+class Page:
+    id = None
+    filepath = None
+    xml_node = None
+    xref = None
+    done = False
+
+    def __init__(self, node):
+        self.id = node.attrib.get('id')
+        self.filepath = node.attrib.get('{http://projectmallard.org/cache/1.0/}href')
+        self.xml_node = ET.parse(self.filepath[7:])
+        self.load_xrefs()
+
+    def load_xrefs(self):
+        global xrefs
+        for node in self.xml_node.findall(".//{http://projectmallard.org/1.0/}link[@type='guide']"):
+            self.xref = node.attrib.get('xref')
+            break
+        if self.xref:
+            sort_title = self.xml_node.findall(".//{http://projectmallard.org/1.0/}title")[0].text
+            for node in self.xml_node.findall(".//{http://projectmallard.org/1.0/}title[@type='sort']"):
+                sort_title = node.text
+                break
+            xrefs.append((self.xref, self, sort_title))
+
+    def complete(self):
+        for xref, page, sort_title in xrefs:
+            if xref == self.id:
+                new_parent_element = self.xml_node.getroot()
+            elif xref.startswith(self.id + '#'):
+                section_id = xref.split('#')[1]
+                section = self.xml_node.findall(".//*[@id='%s']" % section_id)
+                try:
+                    new_parent_element = section[0]
+                except IndexError:
+                    print >> sys.stderr, 'W: missing section:', section_id
+                    continue
+            else:
+                continue
+
+            new_section = ET.SubElement(new_parent_element, '{http://projectmallard.org/1.0/}section')
+            new_section.attrib['id'] = 'auto-' + page.id
+            new_section.extend(page.xml_node.getroot().getchildren())
+            page.done = True
+
+
+def create_mallard_cache(directory):
+    page_files = [os.path.join(directory, x) for x in os.listdir(directory) \
+            if x.endswith('.page') and x != '_united.page']
+    cmd = ['yelp-build', 'cache', '-o', '-']
+    cmd.extend(page_files)
+    p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+    stdout, stderr = p.communicate()
+    if p.returncode != 0:
+        raise Exception('failed to create initial mallard cache')
+    return stdout
+
+def create_united_page(mallard_cache, directory):
+    global xrefs
+
+    pages = {}
+
+    index_cache = ET.fromstring(mallard_cache)
+
+    # load pages
+    for page_node in index_cache.findall(".//{http://projectmallard.org/1.0/}page"):
+        page = Page(page_node)
+        pages[page.id] = page
+
+    xrefs.sort(lambda x, y: cmp(x[2], y[2]))
+
+    for page in pages.values():
+        page.complete()
+
+    leftovers = []
+    for page in pages.values():
+        if page.done:
+            continue
+        if page.id == 'index':
+            continue
+        leftovers.append(page)
+
+    if leftovers:
+        index_page = pages.get('index')
+        appendix = ET.SubElement(index_page.xml_node.getroot(), '{http://projectmallard.org/1.0/}section')
+        appendix.attrib['id'] = 'appendix'
+        ET.SubElement(appendix, '{http://projectmallard.org/1.0/}title').text = 'Annexes'
+        for page in leftovers:
+            new_section = ET.SubElement(appendix, '{http://projectmallard.org/1.0/}section')
+            new_section.extend(page.xml_node.getroot().getchildren())
+
+    pages['index'].xml_node.write(os.path.join(directory, '_united.page'))
+
+def create_united_mallard_cache(directory):
+    cmd = ['yelp-build', 'cache', '-o', os.path.join(directory, '_united.cache'),
+            os.path.join(directory, '_united.page')]
+    if subprocess.call(cmd) != 0:
+        raise Exception('failed to create united mallard cache')
+
+def create_latex(directory, latex_filename):
+    xsl_path = os.path.abspath(os.path.dirname(__file__))
+    cmd = ['xsltproc', os.path.join(xsl_path, 'mal2latex.xsl'),
+            os.path.join(directory, '_united.cache')]
+    p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+    stdout, stderr = p.communicate()
+    if p.returncode != 0:
+        raise Exception('failed to create united LaTeX file')
+    file(latex_filename, 'w').write(stdout)
+
+
+if __name__ == '__main__':
+    parser = OptionParser()
+    parser.add_option('-d', '--directory', dest='directory',
+            metavar='DIRECTORY', default='.')
+    parser.add_option('-l', '--latex', dest='latex', metavar='FILEANAME')
+
+    options, args = parser.parse_args()
+    directory = options.directory
+    mallard_cache = create_mallard_cache(directory)
+    create_united_page(mallard_cache, directory)
+    create_united_mallard_cache(directory)
+    create_latex(directory, options.latex)