From 258d2d04c24df81f1f0ddccce80462ea03bebdce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9ters?= Date: Fri, 16 May 2014 00:24:03 +0200 Subject: [PATCH] unitex.py, to unite Mallard pages into a single hierarchical page --- unitex.py | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100755 unitex.py diff --git a/unitex.py b/unitex.py new file mode 100755 index 0000000..65db17f --- /dev/null +++ b/unitex.py @@ -0,0 +1,149 @@ +#! /usr/bin/env python +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . + +# This script unites Mallard pages into a single hierarchical page, to +# be processed to create a PDF document. + +import xml.etree.ElementTree as ET +import subprocess +from optparse import OptionParser +import os +import sys +import tempfile + +xrefs = [] + +class Page: + id = None + filepath = None + xml_node = None + xref = None + done = False + + def __init__(self, node): + self.id = node.attrib.get('id') + self.filepath = node.attrib.get('{http://projectmallard.org/cache/1.0/}href') + self.xml_node = ET.parse(self.filepath[7:]) + self.load_xrefs() + + def load_xrefs(self): + global xrefs + for node in self.xml_node.findall(".//{http://projectmallard.org/1.0/}link[@type='guide']"): + self.xref = node.attrib.get('xref') + break + if self.xref: + sort_title = self.xml_node.findall(".//{http://projectmallard.org/1.0/}title")[0].text + for node in self.xml_node.findall(".//{http://projectmallard.org/1.0/}title[@type='sort']"): + sort_title = node.text + break + xrefs.append((self.xref, self, sort_title)) + + def complete(self): + for xref, page, sort_title in xrefs: + if xref == self.id: + new_parent_element = self.xml_node.getroot() + elif xref.startswith(self.id + '#'): + section_id = xref.split('#')[1] + section = self.xml_node.findall(".//*[@id='%s']" % section_id) + try: + new_parent_element = section[0] + except IndexError: + print >> sys.stderr, 'W: missing section:', section_id + continue + else: + continue + + new_section = ET.SubElement(new_parent_element, '{http://projectmallard.org/1.0/}section') + new_section.attrib['id'] = 'auto-' + page.id + new_section.extend(page.xml_node.getroot().getchildren()) + page.done = True + + +def create_mallard_cache(directory): + page_files = [os.path.join(directory, x) for x in os.listdir(directory) \ + if x.endswith('.page') and x != '_united.page'] + cmd = ['yelp-build', 'cache', '-o', '-'] + cmd.extend(page_files) + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + stdout, stderr = p.communicate() + if p.returncode != 0: + raise Exception('failed to create initial mallard cache') + return stdout + +def create_united_page(mallard_cache, directory): + global xrefs + + pages = {} + + index_cache = ET.fromstring(mallard_cache) + + # load pages + for page_node in index_cache.findall(".//{http://projectmallard.org/1.0/}page"): + page = Page(page_node) + pages[page.id] = page + + xrefs.sort(lambda x, y: cmp(x[2], y[2])) + + for page in pages.values(): + page.complete() + + leftovers = [] + for page in pages.values(): + if page.done: + continue + if page.id == 'index': + continue + leftovers.append(page) + + if leftovers: + index_page = pages.get('index') + appendix = ET.SubElement(index_page.xml_node.getroot(), '{http://projectmallard.org/1.0/}section') + appendix.attrib['id'] = 'appendix' + ET.SubElement(appendix, '{http://projectmallard.org/1.0/}title').text = 'Annexes' + for page in leftovers: + new_section = ET.SubElement(appendix, '{http://projectmallard.org/1.0/}section') + new_section.extend(page.xml_node.getroot().getchildren()) + + pages['index'].xml_node.write(os.path.join(directory, '_united.page')) + +def create_united_mallard_cache(directory): + cmd = ['yelp-build', 'cache', '-o', os.path.join(directory, '_united.cache'), + os.path.join(directory, '_united.page')] + if subprocess.call(cmd) != 0: + raise Exception('failed to create united mallard cache') + +def create_latex(directory, latex_filename): + xsl_path = os.path.abspath(os.path.dirname(__file__)) + cmd = ['xsltproc', os.path.join(xsl_path, 'mal2latex.xsl'), + os.path.join(directory, '_united.cache')] + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + stdout, stderr = p.communicate() + if p.returncode != 0: + raise Exception('failed to create united LaTeX file') + file(latex_filename, 'w').write(stdout) + + +if __name__ == '__main__': + parser = OptionParser() + parser.add_option('-d', '--directory', dest='directory', + metavar='DIRECTORY', default='.') + parser.add_option('-l', '--latex', dest='latex', metavar='FILEANAME') + + options, args = parser.parse_args() + directory = options.directory + mallard_cache = create_mallard_cache(directory) + create_united_page(mallard_cache, directory) + create_united_mallard_cache(directory) + create_latex(directory, options.latex)