library-web/src/modtypes/mallard.py

# libgo - script to build library.gnome.org
# Copyright (C) 2007-2009  Frederic Peters
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301  USA

import glob
import logging
import os
import re
import shutil
import stat
import subprocess
import tarfile
import tempfile

try:
    import elementtree.ElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET

import xml.parsers.expat

import errors
from base import DocModule

def dup_node(e):
    return ET.fromstring(ET.tostring(e))


MAL_NS = 'http://projectmallard.org/1.0/'

class NotAMallardPageException(Exception):
    pass

class MallardPage:
    id = None
    link_title = None
    sort_title = None
    info_nodes = None
    sections = None

    def __init__(self, filename):
        self.info_nodes = []
        self.sections = []

        tree = ET.parse(filename)
        if tree.getroot().tag != '{%s}page' % MAL_NS:
            raise NotAMallardPageException()

        try:
            self.id = tree.getroot().attrib['id']
        except KeyError:
            pass
        if self.id is None:
            return
        self.page_info(tree.find('{%s}info' % MAL_NS))
        self.title_tag = tree.find('{%s}title' % MAL_NS)
        if self.title_tag is not None:
            if self.link_title is None:
                self.link_title = dup_node(self.title_tag)
                self.link_title.attrib['type'] = 'link'
            if self.sort_title is None:
                self.sort_title = dup_node(self.title_tag)
                self.sort_title.attrib['type'] = 'sort'
        for section in tree.findall('{%s}section' % MAL_NS):
            title_tag = section.find('{%s}title' % MAL_NS)
            if title_tag is None:
                continue
            if not section.attrib.get('id'):
                continue
            self.sections.append({'id': section.attrib['id'],
                             'title': dup_node(title_tag),
                             'link_title': dup_node(title_tag),
                             'sort_title': dup_node(title_tag)})

    def page_info(self, element):
        if element is None:
            return
        title_tag = element.find('{%s}title' % MAL_NS)
        if title_tag is not None:
            type = title_tag.attrib.get('type', 'link')
            try:
                role = title_tag.attrib['role']
            except KeyError:
                role = None
            if type == 'link' and not role:
                self.link_title = dup_node(title_tag)
            if type == 'sort':
                self.sort_title = dup_node(title_tag)
        for desc_tag in element.findall('{%s}desc' % MAL_NS):
            self.info_nodes.append(desc_tag)
        for link_tag in element.findall('{%s}link' % MAL_NS):
            self.info_nodes.append(link_tag)
        for link_tag in element.findall('{%s}title' % MAL_NS):
            self.info_nodes.append(link_tag)

    def toxml(self):
        e = ET.Element('{%s}page' % MAL_NS)
        e.attrib['id'] = self.id
        info = ET.SubElement(e, '{%s}info' % MAL_NS)
        for node in self.info_nodes:
            info.append(node)
        if self.link_title is not None:
            info.append(self.link_title)
        if self.sort_title is not None:
            info.append(self.sort_title)
        if self.title_tag:
            e.append(dup_node(self.title_tag))
        for section in self.sections:
            section_node = ET.SubElement(e, '{%s}section' % MAL_NS)
            section_node.attrib['id'] = '%s#%s' % (self.id, section.get('id'))
            section_info = ET.SubElement(section_node, '{%s}info' % MAL_NS)
            section.get('link_title').attrib['type'] = 'link'
            section.get('sort_title').attrib['type'] = 'sort'
            section_info.append(section.get('link_title'))
            section_info.append(section.get('sort_title'))
            section_node.append(section.get('title'))
        return e


class MallardCache:
    def __init__(self):
        self.all_pages = []

    def add_page(self, filename):
        page = MallardPage(filename)
        if page.id is None:
            return
        self.all_pages.append(page)
        return page

    def toxml(self):
        mallard_cache = ET.Element('{%s}cache' % MAL_NS)
        for page in self.all_pages:
            mallard_cache.append(page.toxml())
        return mallard_cache

    def dump(self, filename):
        tree = ET.ElementTree(self.toxml())
        tree.write(filename)


class MallardModule(DocModule):
    '''Class for documentation shipped in a tarball and using gnome-doc-utils'''
    mal2html_xsl_file = os.path.join(data_dir, 'xslt', 'mal2html.xsl')
    category = None

    related_xsl_files = ['mal2html.xsl', 'heading.xsl']

    def setup_channel(self):
        # get category from omf file
        ext_dirname = os.path.join(app.config.private_dir, 'extracts')
        omf_file = glob.glob(os.path.join(ext_dirname, self.dirname) + '/*.omf.in')
        if not omf_file:
            omf_file = glob.glob(os.path.join(ext_dirname, self.dirname) + '/C/*.omf.in')
        if omf_file:
            try:
                self.category = ET.parse(omf_file[0]).find('resource/subject').attrib['category']
            except (IndexError, KeyError):
                pass

        channel = 'users'
        if self.category and (self.category.startswith('GNOME|Development') or
                self.category.startswith('GNOME|Applications|Programming')):
            channel = 'devel'

        self.channel = app.overlay.get_channel_overlay(self.modulename, channel)
        if self.channel == 'misc' and not self.category:
            self.category = 'GNOME|Other'

    def __str__(self):
        return 'mallard module at %s' % self.dirname

    def expand_doc_figures(self, doc_figures):
        for figure in doc_figures[:]:
            if figure == '$(NULL)': # common idiom
                doc_figures.remove(figure)
                continue

            if figure.startswith('$'): # $(blah)
                doc_figures.remove(figure)
                var_name = figure[2:-1]
                var_content = re.findall('%s\s+=\s+(.*)' % var_name,
                                self.makefile_am)[0].split()
                doc_figures.extend(var_content)

    def process(self):
        doc_module = self.modulename
        ext_dirname = os.path.join(app.config.private_dir, 'extracts')

        try:
            doc_linguas = re.findall(r'(?:DOC_LINGUAS|HELP_LINGUAS)\s+=[\t ](.*)',
                    self.makefile_am)[0].split()
            if not 'en' in doc_linguas:
                doc_linguas.append('en')
        except IndexError:
            doc_linguas = ['en']

        try:
            doc_pages = re.findall(r'(?:DOC_PAGES|HELP_FILES)\s+=\s+(.*)',
                                   self.makefile_am)[0].split()
        except IndexError:
            return

        try:
            doc_figures = re.findall(r'(?:DOC_FIGURES|HELP_FIGURES)\s+=\s+(.*)',
                    self.makefile_am)[0].split()
            figures_dirname = os.path.join(ext_dirname, self.dirname, 'C')
            self.expand_doc_figures(doc_figures)
            for doc_figure in doc_figures:
                if not os.path.exists(os.path.join(figures_dirname, doc_figure)):
                    logging.warning('figure (%s) listed but not present, going to autodiscover' % \
                        doc_figure)
                    raise IndexError()
        except IndexError:
            figures_dirname = os.path.join(ext_dirname, self.dirname, 'C', 'figures')
            images_dirname = os.path.join(ext_dirname, self.dirname, 'C', 'images')
            doc_figures = []
            if os.path.exists(figures_dirname):
                doc_figures += [os.path.join('figures', x) for x in \
                        os.listdir(figures_dirname) \
                        if os.path.splitext(x)[1] in ('.png', '.jpg', '.jpeg')]
            if os.path.exists(images_dirname):
                doc_figures += [os.path.join('images', x) for x in \
                        os.listdir(images_dirname) \
                        if os.path.splitext(x)[1] in ('.png', '.jpg', '.jpeg')]

        doc_linguas.sort()
        if app.config.languages:
            for lang in doc_linguas[:]:
                if lang not in app.config.languages + ['C']:
                    doc_linguas.remove(lang)

        doc = self.get_libgo_document(doc_linguas)
        if not doc:
            return

        if self.category:
            doc.category = self.category
            doc.toc_id = app.toc_mapping.get(doc.category)

        web_output_dir = app.get_module_web_output_dir(self, versioned=True)

        logging.info('generating doc in %s' % web_output_dir[len(app.config.output_dir):])
        if not os.path.exists(web_output_dir):
            os.makedirs(web_output_dir)

        for lang in doc.languages:
            if lang == 'en' and not os.path.exists(os.path.join(ext_dirname, self.dirname, 'en')):
                lang_dirname = os.path.join(ext_dirname, self.dirname, 'C')
            else:
                lang_dirname = os.path.join(ext_dirname, self.dirname, lang)
            mallard_cache = MallardCache()
            for doc_page in doc_pages:
                xml_file = os.path.join(lang_dirname, doc_page)
                if not os.path.exists(xml_file):
                    if lang in doc.languages:
                        doc.languages.remove(lang)
                    logging.warn('failed to find %s for lang %s' % (xml_file, lang))
                    continue
                try:
                    page = mallard_cache.add_page(xml_file)
                except xml.parsers.expat.ExpatError:
                    logging.warn('failed to add %s to mallard cache file for lang %s' % (
                                            xml_file, lang))
                    continue
                except NotAMallardPageException:
                    logging.warn('failed processing %s, not a mallard page' % xml_file)
                    doc_pages.remove(doc_page)
                    continue
                if doc_page == 'index.page':
                    def get_plain(x):
                        # XXX: this should be removed, Mallard provides a text
                        # version of the title in <info>
                        if x.text:
                            t = x.text
                        else:
                            t = ''
                        for element in x.getchildren():
                            if element.tag == '{%s}media' % MAL_NS:
                                continue
                            if not element.text:
                                continue
                            t += ' '
                            t += element.text
                            if element.tail:
                                t += element.tail
                        return t.strip()
                    doc.title[lang] = get_plain(page.link_title)
            if not lang in doc.languages:
                continue
            temporary = tempfile.NamedTemporaryFile()
            mallard_cache.dump(temporary.name)

            html_index_file = os.path.join(web_output_dir, 'index.html.%s' % lang)
            if not app.rebuild_all and (
                    app.rebuild_language is None or
                    lang != app.rebuild_language) and os.path.exists(html_index_file):
                mtime = os.stat(html_index_file)[stat.ST_MTIME]
                if mtime > max(self.mtime_tarball, self.mtime_xslt_files):
                    continue

            for doc_page in doc_pages:
                xml_file = os.path.join(lang_dirname, doc_page)
                filename = os.path.splitext(doc_page)[0] + '.html.' + lang

                # format docbook into html files
                cmd = ['xsltproc',
                        '--nonet', '--xinclude',
                        '--stringparam', 'libgo.mallard.html.basedir', web_output_dir,
                        '--stringparam', 'mal.cache.file', temporary.name,
                        '--stringparam', 'libgo.lang', lang,
                        '--stringparam', 'libgo.channel', self.channel,
                        self.mal2html_xsl_file, xml_file]

                if self.nightly:
                    cmd[2:2] = ['--param', 'libgo.nightly', 'true()']

                if app.config.symbols_dbm_filepath:
                    cmd.insert(-2, '--param')
                    cmd.insert(-2, 'libgo.dbm_support')
                    cmd.insert(-2, 'true()')

                onepage_cmd = cmd[:]

                logging.debug('executing %s' % ' '.join(cmd))
                rc = subprocess.call(cmd)
                if rc != 0:
                    logging.warn('%s failed with error %d' % (' '.join(cmd), rc))

            if doc_figures:
                # and copy images/
                logging.debug('copying figures')
                for doc_figure in doc_figures:
                    src = os.path.join(lang_dirname, doc_figure)
                    if not os.path.exists(src):
                        # fallback to image from C locale.
                        src = os.path.join(ext_dirname, self.dirname, 'C', doc_figure)
                        if not os.path.exists(src):
                            continue
                    if os.path.splitext(src)[-1] in ('.catalog',):
                        # the .catalog file should be unique for all
                        # translations
                        dst = os.path.join(web_output_dir, doc_figure)
                    else:
                        dst = os.path.join(web_output_dir, doc_figure + '.%s' % lang)
                    if not os.path.exists(os.path.split(dst)[0]):
                        os.makedirs(os.path.split(dst)[0])
                    open(dst, 'w').write(open(src, 'r').read())

        self.install_version_symlinks(doc)