debian-python-odf/contrib/odf2epub/odf2epub

351 lines
13 KiB
Python
Executable File

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2010 Søren Roug, European Environment Agency
#
# This is free software. You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
# 2 or at your option any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
# Contributor(s):
#
from odf.odf2xhtml import ODF2XHTML
from odf.namespaces import TEXTNS, XLINKNS
from odf.opendocument import load
import sys, getopt, time, zipfile
from StringIO import StringIO
from cgi import escape
UNIXPERMS = 0100644 << 16L # -rw-r--r--
def escaped(string):
return escape(string).encode('us-ascii','xmlcharrefreplace')
def usage():
sys.stderr.write("Usage: %s [-c cover-image] [-o output-file] [-p] inputfile\n" % sys.argv[0])
class NavpointEntry:
def __init__(self, anchor, title, chapter, level):
self.anchor = anchor
self.title = title
self.chapter = chapter
self.level = level
class ODF2EPUB(ODF2XHTML):
in_toc = False
navpoint_list = []
chapters = []
headerpart = []
cur_html_name = 'chapter0.xhtml'
def __init__(self, generate_css=True, embedable=False):
ODF2XHTML.__init__(self, generate_css, embedable)
self.elements[(TEXTNS, "table-of-content")] = (self.s_text_table_of_content, self.e_text_table_of_content)
def s_text_table_of_content(self, tag, attrs):
self.in_toc = True
def e_text_table_of_content(self, tag, attrs):
self.in_toc = False
# def s_text_a(self, tag, attrs):
# if self.in_toc:
# href = attrs[(XLINKNS,"href")].split("|")[0]
# if href[0] == "#":
# n = NavpointEntry(self.get_anchor(href[1:]), href[1:])
# self.navpoint_list.append(n)
# return ODF2XHTML.s_text_a(self, tag, attrs)
# def e_text_a(self, tag, attrs):
# pass
def s_text_h(self, tag, attrs):
""" Handle a heading
If the heading is a level 1 heading, then split the HTML file.
We have to be careful, because the heading can be inside a frame or a table.
"""
level = int(attrs[(TEXTNS,'outline-level')])
if level == 1:
tags_to_keep = self.htmlstack[:]
tags_to_close = self.htmlstack[:]
tags_to_close.reverse()
for htag,hattrs,hblock in tags_to_close:
if htag == 'body':
self.generate_footnotes()
self._resetfootnotes()
self.closetag(htag)
# I have to do this rather ugly, as the saved header part doesn't
# go through the self.opentag() method
self.chapters.append(''.join(self.headerpart + self.lines))
self.lines = []
self.htmlstack = tags_to_keep[:2] # Only <html> and <body>
for htag,hattrs,hblock in tags_to_keep[2:]:
self.opentag(htag,hattrs,hblock)
return ODF2XHTML.s_text_h(self, tag, attrs)
def e_text_h(self, tag, attrs):
""" Headings end """
level = int(attrs[(TEXTNS,'outline-level')])
if level > 6: level = 6 # Heading levels go only to 6 in XHTML
if level < 1: level = 1
lev = self.headinglevels[1:level+1]
outline = '.'.join(map(str,lev) )
heading = ''.join(self.data)
anchor = self.get_anchor("%s.%s" % ( outline, heading))
n = NavpointEntry(anchor, heading, len(self.chapters), level)
self.navpoint_list.append(n)
return ODF2XHTML.e_text_h(self, tag, attrs)
def s_office_text(self, tag, attrs):
""" Save all the lines up to and including the <body> tag
so I can split the file into more files
"""
ODF2XHTML.s_office_text(self, tag, attrs)
self.headerpart = self.lines
self.lines = []
def e_office_document_content(self, tag, attrs):
ODF2XHTML.e_office_document_content(self, tag, attrs)
self.chapters.append(''.join(self.headerpart + self.lines))
self.lines = []
class EPublication:
mimetype = "application/epub+zip"
coverimage = None
coverhtml = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Cover</title>
<style type="text/css"> img { max-width: 100%; } </style>
</head>
<body>
<div id="cover-image">
<img src="Pictures/cover.jpg" alt="Cover image"/>
</div>
</body>
</html>"""
container = """<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>"""
content_opf_head = """<?xml version="1.0"?>
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookID" version="2.0">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
<dc:title>%s</dc:title>
<dc:language>%s</dc:language>
<dc:identifier id="BookID" opf:scheme="URI">%s</dc:identifier>
<dc:creator>%s</dc:creator>
%s
</metadata>
<manifest>
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
<item id="styles-css" href="styles.css" media-type="text/css"/>"""
toc_ncx_head = """<?xml version="1.0"?>
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"
"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
<head>
<meta name="dtb:uid" content="%s"/>
<meta name="dtb:depth" content="2"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle>
<text>%s</text>
</docTitle>
<navMap>
<navPoint id="navPoint-1" playOrder="1">
<navLabel>
<text>Start</text>
</navLabel>
<content src="chapter0.xhtml"/>
</navPoint>"""
toc_ncx_foot = """ </navMap>
</ncx>"""
def __init__(self, filename, coverimage):
self.doc = load(filename)
self.coverimage = coverimage
self.odhandler = ODF2EPUB(True, False)
self.odhandler.add_style_file("styles.css")
self.odhandler.load(self.doc)
def save(self, outputfile):
""" Save the document under the filename """
if outputfile == '-':
outputfp = zipfile.ZipFile(sys.stdout,"w")
else:
outputfp = zipfile.ZipFile(outputfile, "w")
self._zipwrite(outputfp)
outputfp.close()
def _zipwrite(self, outputfp):
""" Write the document to an open file pointer """
now = time.localtime()[:6]
xhtml = self.odhandler.xhtml()
# Write mimetype - uncompressed
zout = zipfile.ZipInfo('mimetype', now)
zout.compress_type = zipfile.ZIP_STORED
zout.external_attr = UNIXPERMS
outputfp.writestr(zout, self.mimetype)
# Write META-INF/container.xml
zout = zipfile.ZipInfo('META-INF/container.xml', now)
zout.compress_type = zipfile.ZIP_DEFLATED
zout.external_attr = UNIXPERMS
outputfp.writestr(zout, self.container)
# Write CSS part
zout = zipfile.ZipInfo('OEBPS/styles.css', now)
zout.compress_type = zipfile.ZIP_DEFLATED
zout.external_attr = UNIXPERMS
outputfp.writestr(zout, self.odhandler.css())
# Write HTML parts
for chapter in range(len(self.odhandler.chapters)):
zout = zipfile.ZipInfo('OEBPS/chapter%d.xhtml' % chapter, now)
zout.compress_type = zipfile.ZIP_DEFLATED
zout.external_attr = UNIXPERMS
xhtml = self.odhandler.chapters[chapter].encode('us-ascii','xmlcharrefreplace')
outputfp.writestr(zout, xhtml)
# Copy images over to output
for arcname, picturerec in self.doc.Pictures.items():
what_it_is, fileobj, mediatype = picturerec
zi = zipfile.ZipInfo("OEBPS/" + str(arcname), now)
zi.compress_type = zipfile.ZIP_STORED
zi.external_attr = UNIXPERMS
outputfp.writestr(zi, fileobj)
# Write content.opf
zout = zipfile.ZipInfo('OEBPS/content.opf', now)
zout.compress_type = zipfile.ZIP_DEFLATED
zout.external_attr = UNIXPERMS
opf = []
if self.coverimage:
covermeta = """<meta name="cover" content="cover-image"/>"""
else:
covermeta = ""
opf.append(self.content_opf_head % (escaped(self.odhandler.title), escaped(self.odhandler.language),
escaped(args[0]), escaped(self.odhandler.creator), covermeta))
if self.coverimage:
opf.append(""" <item id="cover-page" href="cover.xhtml" media-type="application/xhtml+xml"/>""")
opf.append(""" <item id="cover-image" href="Pictures/cover.jpg" media-type="image/jpeg"/>""")
for chapter in range(len(self.odhandler.chapters)):
opf.append(""" <item id="chapter%d.xhtml" href="chapter%d.xhtml" media-type="application/xhtml+xml"/>""" % (chapter, chapter))
# Write manifest of images.
for arcname, picturerec in self.doc.Pictures.items():
what_it_is, fileobj, mediatype = picturerec
opf.append(""" <item id="%s" href="%s" media-type="%s"/>""" % (arcname.replace('/','_'), arcname, mediatype))
opf.append("""</manifest>""")
opf.append("""<spine toc="ncx">""")
if self.coverimage:
opf.append(""" <itemref idref="cover-page" linear="no"/>""")
for chapter in range(len(self.odhandler.chapters)):
opf.append(""" <itemref idref="chapter%d.xhtml"/>""" % chapter)
opf.append("""</spine>""")
if self.coverimage:
opf.append("""<guide>""")
opf.append(""" <reference type="cover" title="Cover" href="cover.xhtml"/>""")
opf.append("""</guide>""")
opf.append('</package>')
outputfp.writestr(zout, '\n'.join(opf))
# Write toc.ncx
zout = zipfile.ZipInfo('OEBPS/toc.ncx', now)
zout.compress_type = zipfile.ZIP_DEFLATED
zout.external_attr = UNIXPERMS
opf = []
opf.append(self.toc_ncx_head % (escaped(args[0]), escaped(self.odhandler.title)))
# We basically force the first navpoint to be a level 1 heading, no
# matter what it was in reality. Then all other headings are either level 1 or 2.
np_inx = 2
np_level = 1
for np in self.odhandler.navpoint_list:
if np_inx == 2:
np.level = 1
if np.level > 2: np.level = 2
if np_inx != 2 and np.level <= np_level:
opf.append(""" </navPoint>""");
if np_inx != 2 and np.level < np_level:
opf.append(""" </navPoint>""");
opf.append(""" <navPoint id="navPoint-%d" playOrder="%d"> <!-- L%d -->
<navLabel>
<text>%s</text>
</navLabel>
<content src="chapter%d.xhtml#%s"/>
""" % (np_inx, np_inx, np.level, escaped(np.title), np.chapter, np.anchor))
np_inx += 1
np_level = np.level
opf.append(""" </navPoint>""");
if np_level > 1:
opf.append(""" </navPoint>""");
opf.append(self.toc_ncx_foot)
outputfp.writestr(zout, '\n'.join(opf))
# Write cover image
if self.coverimage:
outputfp.write(coverimage,'OEBPS/Pictures/cover.jpg', zipfile.ZIP_STORED)
zout = zipfile.ZipInfo('OEBPS/cover.xhtml', now)
zout.compress_type = zipfile.ZIP_DEFLATED
zout.external_attr = UNIXPERMS
outputfp.writestr(zout, self.coverhtml)
# zout = zipfile.ZipInfo('OEBPS/styles.css', now)
# zout.compress_type = zipfile.ZIP_DEFLATED
# zout.external_attr = UNIXPERMS
# css = self.odhandler.css().encode('us-ascii','xmlcharrefreplace')
# outputfp.writestr(zout, css)
try:
opts, args = getopt.getopt(sys.argv[1:], "c:po:", ["cover", "plain","output"])
except getopt.GetoptError:
usage()
sys.exit(2)
generatecss = True
embedable = False
outputfilename = "-"
coverimage = None
for o, a in opts:
if o in ("-c", "--cover"):
coverimage = a
if o in ("-p", "--plain"):
generatecss = False
if o in ("-o", "--output"):
outputfilename = a
if len(args) != 1:
usage()
sys.exit(2)
try:
epub = EPublication(args[0], coverimage)
epub.save(outputfilename)
except:
sys.stderr.write("Unable to open file %s or file is not OpenDocument\n" % sys.argv[1])
sys.exit(1)
sys.exit(0)