This repository has been archived on 2023-02-21. You can view files and clone it, but cannot push or open issues or pull requests.
tabellioOOo/legi2pdf/lib/legi2pdf/pdfGenerator.py

418 lines
15 KiB
Python
Executable File

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Tabellio -- software suite for deliberative assemblies
# -- suite logicielle pour assemblées délibératives
# -- http://www.tabellio.org/
# Copyright (C) 2006 Parlement de la Communauté française de Belgique
# This file is part of Tabellio.
# Tabellio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# Tabellio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import sys
import os
import re
import threading
import time
import string
import math
import tempfile
import getopt
import cStringIO
import shutil
import libxml2
import libxslt
import logging
import subprocess
try:
import elementtree.ElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
NAME_CONTENT_LEGI = 'contents.xml'
# from pythonlib
import xmlutils
from xmlutils import parseStream, applyStylesheet2
from epsutils import eps2pdf
from ziputils import unzipToDirectory
from utf8utils import utf8encode
import magic
try:
import PIL
import PIL.Image
except ImportError:
PIL = None
tempfilelock = threading.Lock()
tempfile.template = "F%ld-" % time.time()
log = logging.getLogger("")
abspath = os.path.abspath(os.path.dirname(__file__))
xslpath = os.path.join(abspath,"..","..","xsl")
xslPreprocessingFilterName = os.path.join(xslpath,"pre_proc.xsl")
xslPostprocessingFilterName = os.path.join(xslpath,"post_proc.xsl")
xslMainprocessingFilterName = os.path.join(xslpath,"main.xsl")
xslAnnexPreprocessingFilterName = os.path.join(xslpath,"annex_pre_proc.xsl")
xslCopyProcessingFilterName = os.path.join(xslpath,"copy_proc.xsl")
class PdfGeneratorException:
def __init__(self,message=""):
self.message = message
def __str__(self):
return self.message
def _writeToFile(filename, data):
f = None
try:
f = open(filename, "wb")
f.write(data)
finally:
if f != None:
f.close()
def makeTempFileName(suffix="",subdir=None):
tmpd = tempfile.tempdir
tempfile.tempdir = os.path.abspath(os.path.join(tempfile.gettempdir(),"legi2pdf"))
if not os.path.exists(tempfile.tempdir):
os.mkdir(tempfile.tempdir)
tempfilelock.acquire()
try:
if not subdir:
return tempfile.mktemp()+suffix
else:
old = tempfile.tempdir
tempfile.tempdir = os.path.join(old,subdir)
r = tempfile.mktemp()+suffix
tempfile.tempdir = old
return r
finally:
tempfile.tempdir = tmpd
tempfilelock.release()
def purgeDirectory(d):
try:
for f in os.listdir(os.path.abspath(d)):
os.remove(os.path.join(d, f))
os.rmdir(d)
finally:
pass
bboxRE = re.compile(r'\d+(?:\.\d+)?')
def _convertBboxVal(val):
r = bboxRE.findall(val)
f = string.atof(r[0])
return "%d" % math.floor(f + 0.5)
def unzipAnnexToDir(annexes, tempDir):
# unzip annex files
for i in range(len(annexes)):
annexeFileName = annexes[i][0]
if annexeFileName[-5:] == ".legi":
unzipToDirectory(annexeFileName, tempDir, "%d_" % i)
else:
shutil.copy(annexeFileName, os.path.join(tempDir,os.path.basename(annexeFileName)))
def getBboxes(d):
bboxDict = {}
doc = None
ctxt = None
try:
doc = libxml2.parseFile(os.path.join(d,NAME_CONTENT_LEGI))
ctxt = doc.xpathNewContext()
res = ctxt.xpathEval("//imagedata")
for r in res:
if r.name == "imagedata":
fileNameProp = r.hasProp("fileref")
widthProp = r.hasProp("width")
depthProp = r.hasProp("depth")
if (fileNameProp and widthProp and depthProp):
bboxDict[fileNameProp.getContent()] = (
_convertBboxVal(widthProp.getContent()),
_convertBboxVal(depthProp.getContent()))
finally:
if doc != None:
doc.freeDoc()
if ctxt != None:
ctxt.xpathFreeContext()
return bboxDict
def convertWmf2Pdf(d):
bboxDict = getBboxes(d)
for wmfFileName in os.listdir(d):
if wmfFileName[-4:] == ".wmf":
epsFileName = wmfFileName[:-4] + ".eps"
pdfFileName = wmfFileName[:-4] + ".pdf"
bbox = bboxDict.get(wmfFileName, None)
if bbox != None:
cmd = "/usr/bin/wmf2eps --bbox=%sx%s %s > %s" % (
bbox[0],
bbox[1],
os.path.join(d,wmfFileName),
os.path.join(d,epsFileName))
else:
cmd = "/usr/bin/wmf2eps %s > %s" % (os.path.join(d,wmfFileName), os.path.join(d,epsFileName))
subprocess.call([cmd],cwd=d, shell=True)
eps2pdf(os.path.join(d,epsFileName))
def insertAnnex(xmlMainDoc, annexes, d):
xmlAnnexDoc = None
inputFile = None
bookNode = xmlMainDoc.children
try:
for i in range(len(annexes)):
#bookNode = xmlMainDoc.children
if annexes[i][0][-5:] == ".legi":
inputFile = open(os.path.join(d,"%d_%s" % (i, NAME_CONTENT_LEGI)), "r")
xmlAnnexDoc = parseStream(inputFile, validate=0)
xslParams = {'annex-id':'%d_' % (i,), 'annex-title':'%s' % utf8encode(annexes[i][1])}
xmlAnnexDoc = applyStylesheet2( xmlAnnexDoc , xslAnnexPreprocessingFilterName, xslParams)
bookNode.addChild(xmlAnnexDoc.children)
inputFile.close()
inputFile = None
elif annexes[i][0][-4:] == ".pdf":
newNode = libxml2.newNode("appendix")
newNode.newProp("type", "pdf")
titleNode = libxml2.newNode("title")
title = annexes[i][1]
if title is not None:
title = title.encode("UTF-8")
titleNode.addContent(title)
newNode.addChild(titleNode)
pdfNode = libxml2.newNode("pdf-annex")
pdfNode.newProp("pdf-file", os.path.basename(annexes[i][0]))
scale = annexes[i][2]
pdfNode.newProp("scale", "%.2f" % (scale/100.0))
newNode.addChild(pdfNode)
bookNode.addChild(newNode)
else:
log.warn("format d'annexe non supporté %s" % annexes[i][0])
finally:
if inputFile != None:
inputFile.close()
def convertLegi2Tex(input, outputFileName, annexes, d, draft=0, toc=True,
style="normal", useFont=None, legacyMode=False):
xmlDoc = None
inputStream = None
outputStream = None
if style.startswith('ooo-'):
legacyMode = False
style = style[4:]
global xslpath, xslPreprocessingFilterName, xslPostprocessingFilterName, xslMainprocessingFilterName, xslAnnexPreprocessingFilterName, xslCopyProcessingFilterName
if legacyMode:
xslpath = os.path.join(abspath,"..","..","xsl-legacy")
xslPreprocessingFilterName = os.path.join(xslpath,"pre_proc.xsl")
xslPostprocessingFilterName = os.path.join(xslpath,"post_proc.xsl")
xslMainprocessingFilterName = os.path.join(xslpath,"main.xsl")
xslAnnexPreprocessingFilterName = os.path.join(xslpath,"annex_pre_proc.xsl")
xslCopyProcessingFilterName = os.path.join(xslpath,"copy_proc.xsl")
else:
xslpath = os.path.join(abspath,"..","..","xsl")
xslPreprocessingFilterName = os.path.join(xslpath,"pre_proc.xsl")
xslPostprocessingFilterName = os.path.join(xslpath,"post_proc.xsl")
xslMainprocessingFilterName = os.path.join(xslpath,"main.xsl")
xslAnnexPreprocessingFilterName = os.path.join(xslpath,"annex_pre_proc.xsl")
xslCopyProcessingFilterName = os.path.join(xslpath,"copy_proc.xsl")
xslParam = {}
tree = ET.fromstring(input)
for property in tree.findall('metadata/property'):
if property.attrib.get('name') != 'keyword':
continue
if 'PFB' in property.text:
xslParam['latex.document.font'] = 'helvet'
xslParam['latex.documentclass'] = 'PFBstd'
else:
xslParam['latex.document.font'] = 'sabon'
xslParam['latex.documentclass'] = 'PCFstd'
if useFont:
# override selected font
xslParam['latex.document.font'] = useFont
try:
# preprocessing
outputStream = open(os.path.join(d, outputFileName), "w")
xml_doc = libxml2.parseDoc(input)
xsl_style = libxslt.parseStylesheetFile(xslPreprocessingFilterName)
preprocessed_xml_doc = xsl_style.applyStylesheet(xml_doc, {})
xsl_style.freeStylesheet()
insertAnnex(preprocessed_xml_doc, annexes, d)
ctxt = preprocessed_xml_doc.xpathNewContext()
# some custom preprocessing of text content:
# marks -- as TABELLIO-- so substitution with the proper command
# (\hyp{}) can happen in the postprocessing phase.
for node in ctxt.xpathEval('//text()'):
if '--' in node.content:
node.setContent(node.content.replace('--', 'TABELLIO--'))
ctxt.xpathFreeContext()
# processing
if draft:
xslParam['with-draft-tag'] = '1'
if toc:
xslParam['with-toc'] = '1'
if style == "parchment":
xslParam['parchemin'] = '1'
if style == "bqr":
xslParam['bqr'] = '1'
for k, v in xslParam.items():
xslParam[k] = xmlutils.makeparam(v)
xsl_style = libxslt.parseStylesheetFile(xslMainprocessingFilterName)
processed_xml_doc = xsl_style.applyStylesheet(preprocessed_xml_doc, xslParam)
xml_latex_doc = xsl_style.saveResultToString(processed_xml_doc)
xsl_style.freeStylesheet()
# postprocessing
doc = ET.fromstring(xml_latex_doc).text
for before, after in [(u'’', u"'"), (u'\u2019', u"'"),
(u'“', u'\guillemotleft'), (u'\u201C', u'«'),
(u'”', u'\guillemotright'), (u'\u201D', u'»'),
(u'‑', u'-'), (u'\u2011', u'-'),
(u'TABELLIO--', u'\hyp{}'),]:
doc = doc.replace(before, after)
# look for end of parts and change the multicol environment not to have
# balanced columns
parts = doc.split('\n% end part')
for i, part in enumerate(parts):
if i == 0:
continue
parts[i] = part.replace(r'\end{multicols}', r'\end{multicols*}', 1)
reversed_previous_part = parts[i-1][::-1]
reversed_previous_part = reversed_previous_part.replace(
r'\begin{multicols}'[::-1], r'\begin{multicols*}'[::-1], 1)
parts[i-1] = reversed_previous_part[::-1]
doc = '\n% adjusted end part'.join(parts)
outputStream.write(doc.encode('utf-8'))
finally:
if outputStream != None:
outputStream.close()
def copy_extra_files(dest):
extra_dir = os.path.join(abspath, '..', '..', 'extra')
for filename in os.listdir(extra_dir):
src = os.path.join(extra_dir, filename)
if not os.path.isfile(src):
continue
file(os.path.join(dest, filename), 'w').write(file(src).read())
def convertTex2Pdf(d, latexFileName):
cmd = "pdftex --fmt=pdflatex --interaction=nonstopmode %s" % (latexFileName)
copy_extra_files(d)
subprocess.call([cmd], cwd=d, shell=True)
subprocess.call([cmd], cwd=d, shell=True)
subprocess.call([cmd], cwd=d, shell=True )
def convertLegi2Pdf(inputFileName, pdfFileName, latexFileName, keepLatex=0,
debug=0, annexes=[], draft=0, toc=True, style="normal", useFont=None,
legacyMode=True, grayscale=True):
"""
Based on input .legi document, generatePdf is responsible
to generate a PDF documents.
Arguments:
inputFileName -- name of the input .legi file
pdfFileName -- name of the output file (pdf)
latexFileName -- name of the output file (LaTeX)
keepLatex -- keep the LaTeX file
debug -- keep the temporary files
annexes -- annexe definition
draft -- add a draft stamp
toc -- enable the table of content
style -- special style (normal, parchment, bqr)
useFont -- document font, override document class definition
legacyMode -- use the legacy xsl conversion files
grayscale -- convert image to grayscale
"""
result = []
allEntries = None
tempDir = None
f = None
try:
tempDir = makeTempFileName(".legi2pdf")
os.mkdir(tempDir)
f = open(inputFileName)
fmt = magic.fileFormat(f)
copy_extra_files(tempDir)
if fmt == "zip":
allEntries = unzipToDirectory(inputFileName, tempDir)
if allEntries == None:
raise PdfGeneratorException, 'Unable to open or to read the legi file: %s' % (inputFileName)
contentEntryInfo, contentEntryData = allEntries.get( NAME_CONTENT_LEGI, (None, None))
if contentEntryInfo == None or contentEntryData == None:
raise PdfGeneratorException, 'Corrupted legi file: No %s entry' % (NAME_CONTENT_LEGI)
if grayscale and PIL:
for filename in os.listdir(tempDir):
if not os.path.splitext(filename)[-1] in ('.jpg', '.png'):
continue
# image, convert it to grayscale
image = PIL.Image.open(os.path.join(tempDir, filename))
grayscaled = image.convert('L')
grayscaled.save(os.path.join(tempDir, filename))
elif fmt == "xml":
contentEntryData = f.read()
_writeToFile(os.path.join(tempDir, NAME_CONTENT_LEGI), contentEntryData)
else:
raise PdfGeneratorException, 'Unsupported input format: %s (should be: xml or zip)' % (fmt)
unzipAnnexToDir(annexes, tempDir)
convertLegi2Tex(contentEntryData, "temp.tex", annexes, tempDir, draft, toc, style, useFont, legacyMode)
convertWmf2Pdf(tempDir)
convertTex2Pdf(tempDir, "temp.tex")
if os.path.exists(pdfFileName):
os.remove(pdfFileName)
if not os.path.exists(os.path.join(tempDir, 'temp.pdf')):
raise PdfGeneratorException('legi2pdf failed to create a pdf file')
shutil.move(os.path.join(tempDir, "temp.pdf"), pdfFileName)
finally:
if keepLatex:
if os.path.exists(latexFileName):
os.remove(latexFileName)
if os.path.exists(os.path.join(tempDir, "temp.tex")):
shutil.move(os.path.join(tempDir, "temp.tex"), latexFileName)
if tempDir != None and not debug:
purgeDirectory(tempDir)
if f != None:
f.close()