unitex.py, to unite Mallard pages into a single hierarchical page

This commit is contained in:
Frédéric Péters 2014-05-16 00:24:03 +02:00
parent 45675cf1a8
commit 258d2d04c2
1 changed files with 149 additions and 0 deletions

149
unitex.py Executable file
View File

@ -0,0 +1,149 @@
#! /usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
# This script unites Mallard pages into a single hierarchical page, to
# be processed to create a PDF document.
import xml.etree.ElementTree as ET
import subprocess
from optparse import OptionParser
import os
import sys
import tempfile
xrefs = []
class Page:
id = None
filepath = None
xml_node = None
xref = None
done = False
def __init__(self, node):
self.id = node.attrib.get('id')
self.filepath = node.attrib.get('{http://projectmallard.org/cache/1.0/}href')
self.xml_node = ET.parse(self.filepath[7:])
self.load_xrefs()
def load_xrefs(self):
global xrefs
for node in self.xml_node.findall(".//{http://projectmallard.org/1.0/}link[@type='guide']"):
self.xref = node.attrib.get('xref')
break
if self.xref:
sort_title = self.xml_node.findall(".//{http://projectmallard.org/1.0/}title")[0].text
for node in self.xml_node.findall(".//{http://projectmallard.org/1.0/}title[@type='sort']"):
sort_title = node.text
break
xrefs.append((self.xref, self, sort_title))
def complete(self):
for xref, page, sort_title in xrefs:
if xref == self.id:
new_parent_element = self.xml_node.getroot()
elif xref.startswith(self.id + '#'):
section_id = xref.split('#')[1]
section = self.xml_node.findall(".//*[@id='%s']" % section_id)
try:
new_parent_element = section[0]
except IndexError:
print >> sys.stderr, 'W: missing section:', section_id
continue
else:
continue
new_section = ET.SubElement(new_parent_element, '{http://projectmallard.org/1.0/}section')
new_section.attrib['id'] = 'auto-' + page.id
new_section.extend(page.xml_node.getroot().getchildren())
page.done = True
def create_mallard_cache(directory):
page_files = [os.path.join(directory, x) for x in os.listdir(directory) \
if x.endswith('.page') and x != '_united.page']
cmd = ['yelp-build', 'cache', '-o', '-']
cmd.extend(page_files)
p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
raise Exception('failed to create initial mallard cache')
return stdout
def create_united_page(mallard_cache, directory):
global xrefs
pages = {}
index_cache = ET.fromstring(mallard_cache)
# load pages
for page_node in index_cache.findall(".//{http://projectmallard.org/1.0/}page"):
page = Page(page_node)
pages[page.id] = page
xrefs.sort(lambda x, y: cmp(x[2], y[2]))
for page in pages.values():
page.complete()
leftovers = []
for page in pages.values():
if page.done:
continue
if page.id == 'index':
continue
leftovers.append(page)
if leftovers:
index_page = pages.get('index')
appendix = ET.SubElement(index_page.xml_node.getroot(), '{http://projectmallard.org/1.0/}section')
appendix.attrib['id'] = 'appendix'
ET.SubElement(appendix, '{http://projectmallard.org/1.0/}title').text = 'Annexes'
for page in leftovers:
new_section = ET.SubElement(appendix, '{http://projectmallard.org/1.0/}section')
new_section.extend(page.xml_node.getroot().getchildren())
pages['index'].xml_node.write(os.path.join(directory, '_united.page'))
def create_united_mallard_cache(directory):
cmd = ['yelp-build', 'cache', '-o', os.path.join(directory, '_united.cache'),
os.path.join(directory, '_united.page')]
if subprocess.call(cmd) != 0:
raise Exception('failed to create united mallard cache')
def create_latex(directory, latex_filename):
xsl_path = os.path.abspath(os.path.dirname(__file__))
cmd = ['xsltproc', os.path.join(xsl_path, 'mal2latex.xsl'),
os.path.join(directory, '_united.cache')]
p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
raise Exception('failed to create united LaTeX file')
file(latex_filename, 'w').write(stdout)
if __name__ == '__main__':
parser = OptionParser()
parser.add_option('-d', '--directory', dest='directory',
metavar='DIRECTORY', default='.')
parser.add_option('-l', '--latex', dest='latex', metavar='FILEANAME')
options, args = parser.parse_args()
directory = options.directory
mallard_cache = create_mallard_cache(directory)
create_united_page(mallard_cache, directory)
create_united_mallard_cache(directory)
create_latex(directory, options.latex)