351 lines
13 KiB
Python
Executable File
351 lines
13 KiB
Python
Executable File
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
# Copyright (C) 2010 Søren Roug, European Environment Agency
|
|
#
|
|
# This is free software. You may redistribute it under the terms
|
|
# of the Apache license and the GNU General Public License Version
|
|
# 2 or at your option any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public
|
|
# License along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
#
|
|
# Contributor(s):
|
|
#
|
|
from odf.odf2xhtml import ODF2XHTML
|
|
from odf.namespaces import TEXTNS, XLINKNS
|
|
from odf.opendocument import load
|
|
import sys, getopt, time, zipfile
|
|
from StringIO import StringIO
|
|
from cgi import escape
|
|
|
|
UNIXPERMS = 0100644 << 16L # -rw-r--r--
|
|
|
|
def escaped(string):
|
|
return escape(string).encode('us-ascii','xmlcharrefreplace')
|
|
|
|
def usage():
|
|
sys.stderr.write("Usage: %s [-c cover-image] [-o output-file] [-p] inputfile\n" % sys.argv[0])
|
|
|
|
class NavpointEntry:
|
|
def __init__(self, anchor, title, chapter, level):
|
|
self.anchor = anchor
|
|
self.title = title
|
|
self.chapter = chapter
|
|
self.level = level
|
|
|
|
class ODF2EPUB(ODF2XHTML):
|
|
|
|
in_toc = False
|
|
navpoint_list = []
|
|
chapters = []
|
|
headerpart = []
|
|
cur_html_name = 'chapter0.xhtml'
|
|
|
|
def __init__(self, generate_css=True, embedable=False):
|
|
ODF2XHTML.__init__(self, generate_css, embedable)
|
|
self.elements[(TEXTNS, "table-of-content")] = (self.s_text_table_of_content, self.e_text_table_of_content)
|
|
|
|
def s_text_table_of_content(self, tag, attrs):
|
|
self.in_toc = True
|
|
|
|
def e_text_table_of_content(self, tag, attrs):
|
|
self.in_toc = False
|
|
|
|
# def s_text_a(self, tag, attrs):
|
|
# if self.in_toc:
|
|
# href = attrs[(XLINKNS,"href")].split("|")[0]
|
|
# if href[0] == "#":
|
|
# n = NavpointEntry(self.get_anchor(href[1:]), href[1:])
|
|
# self.navpoint_list.append(n)
|
|
# return ODF2XHTML.s_text_a(self, tag, attrs)
|
|
|
|
# def e_text_a(self, tag, attrs):
|
|
# pass
|
|
|
|
def s_text_h(self, tag, attrs):
|
|
""" Handle a heading
|
|
If the heading is a level 1 heading, then split the HTML file.
|
|
We have to be careful, because the heading can be inside a frame or a table.
|
|
"""
|
|
level = int(attrs[(TEXTNS,'outline-level')])
|
|
if level == 1:
|
|
tags_to_keep = self.htmlstack[:]
|
|
tags_to_close = self.htmlstack[:]
|
|
tags_to_close.reverse()
|
|
for htag,hattrs,hblock in tags_to_close:
|
|
if htag == 'body':
|
|
self.generate_footnotes()
|
|
self._resetfootnotes()
|
|
self.closetag(htag)
|
|
# I have to do this rather ugly, as the saved header part doesn't
|
|
# go through the self.opentag() method
|
|
self.chapters.append(''.join(self.headerpart + self.lines))
|
|
self.lines = []
|
|
self.htmlstack = tags_to_keep[:2] # Only <html> and <body>
|
|
for htag,hattrs,hblock in tags_to_keep[2:]:
|
|
self.opentag(htag,hattrs,hblock)
|
|
return ODF2XHTML.s_text_h(self, tag, attrs)
|
|
|
|
def e_text_h(self, tag, attrs):
|
|
""" Headings end """
|
|
level = int(attrs[(TEXTNS,'outline-level')])
|
|
if level > 6: level = 6 # Heading levels go only to 6 in XHTML
|
|
if level < 1: level = 1
|
|
lev = self.headinglevels[1:level+1]
|
|
outline = '.'.join(map(str,lev) )
|
|
heading = ''.join(self.data)
|
|
anchor = self.get_anchor("%s.%s" % ( outline, heading))
|
|
n = NavpointEntry(anchor, heading, len(self.chapters), level)
|
|
self.navpoint_list.append(n)
|
|
return ODF2XHTML.e_text_h(self, tag, attrs)
|
|
|
|
def s_office_text(self, tag, attrs):
|
|
""" Save all the lines up to and including the <body> tag
|
|
so I can split the file into more files
|
|
"""
|
|
ODF2XHTML.s_office_text(self, tag, attrs)
|
|
self.headerpart = self.lines
|
|
self.lines = []
|
|
|
|
def e_office_document_content(self, tag, attrs):
|
|
ODF2XHTML.e_office_document_content(self, tag, attrs)
|
|
self.chapters.append(''.join(self.headerpart + self.lines))
|
|
self.lines = []
|
|
|
|
class EPublication:
|
|
|
|
mimetype = "application/epub+zip"
|
|
coverimage = None
|
|
|
|
coverhtml = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
<head>
|
|
<title>Cover</title>
|
|
<style type="text/css"> img { max-width: 100%; } </style>
|
|
</head>
|
|
<body>
|
|
<div id="cover-image">
|
|
<img src="Pictures/cover.jpg" alt="Cover image"/>
|
|
</div>
|
|
</body>
|
|
</html>"""
|
|
|
|
container = """<?xml version="1.0"?>
|
|
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
|
<rootfiles>
|
|
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
|
|
</rootfiles>
|
|
</container>"""
|
|
|
|
content_opf_head = """<?xml version="1.0"?>
|
|
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookID" version="2.0">
|
|
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
|
|
<dc:title>%s</dc:title>
|
|
<dc:language>%s</dc:language>
|
|
<dc:identifier id="BookID" opf:scheme="URI">%s</dc:identifier>
|
|
<dc:creator>%s</dc:creator>
|
|
%s
|
|
</metadata>
|
|
<manifest>
|
|
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
|
|
<item id="styles-css" href="styles.css" media-type="text/css"/>"""
|
|
|
|
toc_ncx_head = """<?xml version="1.0"?>
|
|
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"
|
|
"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
|
|
|
|
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
|
|
<head>
|
|
<meta name="dtb:uid" content="%s"/>
|
|
<meta name="dtb:depth" content="2"/>
|
|
<meta name="dtb:totalPageCount" content="0"/>
|
|
<meta name="dtb:maxPageNumber" content="0"/>
|
|
</head>
|
|
<docTitle>
|
|
<text>%s</text>
|
|
</docTitle>
|
|
<navMap>
|
|
<navPoint id="navPoint-1" playOrder="1">
|
|
<navLabel>
|
|
<text>Start</text>
|
|
</navLabel>
|
|
<content src="chapter0.xhtml"/>
|
|
</navPoint>"""
|
|
toc_ncx_foot = """ </navMap>
|
|
</ncx>"""
|
|
|
|
def __init__(self, filename, coverimage):
|
|
self.doc = load(filename)
|
|
self.coverimage = coverimage
|
|
self.odhandler = ODF2EPUB(True, False)
|
|
self.odhandler.add_style_file("styles.css")
|
|
self.odhandler.load(self.doc)
|
|
|
|
def save(self, outputfile):
|
|
""" Save the document under the filename """
|
|
if outputfile == '-':
|
|
outputfp = zipfile.ZipFile(sys.stdout,"w")
|
|
else:
|
|
outputfp = zipfile.ZipFile(outputfile, "w")
|
|
self._zipwrite(outputfp)
|
|
outputfp.close()
|
|
|
|
def _zipwrite(self, outputfp):
|
|
""" Write the document to an open file pointer """
|
|
now = time.localtime()[:6]
|
|
xhtml = self.odhandler.xhtml()
|
|
|
|
# Write mimetype - uncompressed
|
|
zout = zipfile.ZipInfo('mimetype', now)
|
|
zout.compress_type = zipfile.ZIP_STORED
|
|
zout.external_attr = UNIXPERMS
|
|
outputfp.writestr(zout, self.mimetype)
|
|
|
|
# Write META-INF/container.xml
|
|
zout = zipfile.ZipInfo('META-INF/container.xml', now)
|
|
zout.compress_type = zipfile.ZIP_DEFLATED
|
|
zout.external_attr = UNIXPERMS
|
|
outputfp.writestr(zout, self.container)
|
|
|
|
# Write CSS part
|
|
zout = zipfile.ZipInfo('OEBPS/styles.css', now)
|
|
zout.compress_type = zipfile.ZIP_DEFLATED
|
|
zout.external_attr = UNIXPERMS
|
|
outputfp.writestr(zout, self.odhandler.css())
|
|
|
|
# Write HTML parts
|
|
for chapter in range(len(self.odhandler.chapters)):
|
|
zout = zipfile.ZipInfo('OEBPS/chapter%d.xhtml' % chapter, now)
|
|
zout.compress_type = zipfile.ZIP_DEFLATED
|
|
zout.external_attr = UNIXPERMS
|
|
xhtml = self.odhandler.chapters[chapter].encode('us-ascii','xmlcharrefreplace')
|
|
outputfp.writestr(zout, xhtml)
|
|
|
|
# Copy images over to output
|
|
for arcname, picturerec in self.doc.Pictures.items():
|
|
what_it_is, fileobj, mediatype = picturerec
|
|
zi = zipfile.ZipInfo("OEBPS/" + str(arcname), now)
|
|
zi.compress_type = zipfile.ZIP_STORED
|
|
zi.external_attr = UNIXPERMS
|
|
outputfp.writestr(zi, fileobj)
|
|
|
|
# Write content.opf
|
|
zout = zipfile.ZipInfo('OEBPS/content.opf', now)
|
|
zout.compress_type = zipfile.ZIP_DEFLATED
|
|
zout.external_attr = UNIXPERMS
|
|
opf = []
|
|
if self.coverimage:
|
|
covermeta = """<meta name="cover" content="cover-image"/>"""
|
|
else:
|
|
covermeta = ""
|
|
opf.append(self.content_opf_head % (escaped(self.odhandler.title), escaped(self.odhandler.language),
|
|
escaped(args[0]), escaped(self.odhandler.creator), covermeta))
|
|
if self.coverimage:
|
|
opf.append(""" <item id="cover-page" href="cover.xhtml" media-type="application/xhtml+xml"/>""")
|
|
opf.append(""" <item id="cover-image" href="Pictures/cover.jpg" media-type="image/jpeg"/>""")
|
|
for chapter in range(len(self.odhandler.chapters)):
|
|
opf.append(""" <item id="chapter%d.xhtml" href="chapter%d.xhtml" media-type="application/xhtml+xml"/>""" % (chapter, chapter))
|
|
# Write manifest of images.
|
|
for arcname, picturerec in self.doc.Pictures.items():
|
|
what_it_is, fileobj, mediatype = picturerec
|
|
opf.append(""" <item id="%s" href="%s" media-type="%s"/>""" % (arcname.replace('/','_'), arcname, mediatype))
|
|
opf.append("""</manifest>""")
|
|
opf.append("""<spine toc="ncx">""")
|
|
if self.coverimage:
|
|
opf.append(""" <itemref idref="cover-page" linear="no"/>""")
|
|
for chapter in range(len(self.odhandler.chapters)):
|
|
opf.append(""" <itemref idref="chapter%d.xhtml"/>""" % chapter)
|
|
opf.append("""</spine>""")
|
|
|
|
if self.coverimage:
|
|
opf.append("""<guide>""")
|
|
opf.append(""" <reference type="cover" title="Cover" href="cover.xhtml"/>""")
|
|
opf.append("""</guide>""")
|
|
|
|
opf.append('</package>')
|
|
outputfp.writestr(zout, '\n'.join(opf))
|
|
|
|
# Write toc.ncx
|
|
zout = zipfile.ZipInfo('OEBPS/toc.ncx', now)
|
|
zout.compress_type = zipfile.ZIP_DEFLATED
|
|
zout.external_attr = UNIXPERMS
|
|
opf = []
|
|
opf.append(self.toc_ncx_head % (escaped(args[0]), escaped(self.odhandler.title)))
|
|
# We basically force the first navpoint to be a level 1 heading, no
|
|
# matter what it was in reality. Then all other headings are either level 1 or 2.
|
|
np_inx = 2
|
|
np_level = 1
|
|
for np in self.odhandler.navpoint_list:
|
|
if np_inx == 2:
|
|
np.level = 1
|
|
if np.level > 2: np.level = 2
|
|
if np_inx != 2 and np.level <= np_level:
|
|
opf.append(""" </navPoint>""");
|
|
if np_inx != 2 and np.level < np_level:
|
|
opf.append(""" </navPoint>""");
|
|
opf.append(""" <navPoint id="navPoint-%d" playOrder="%d"> <!-- L%d -->
|
|
<navLabel>
|
|
<text>%s</text>
|
|
</navLabel>
|
|
<content src="chapter%d.xhtml#%s"/>
|
|
""" % (np_inx, np_inx, np.level, escaped(np.title), np.chapter, np.anchor))
|
|
np_inx += 1
|
|
np_level = np.level
|
|
opf.append(""" </navPoint>""");
|
|
if np_level > 1:
|
|
opf.append(""" </navPoint>""");
|
|
opf.append(self.toc_ncx_foot)
|
|
outputfp.writestr(zout, '\n'.join(opf))
|
|
|
|
# Write cover image
|
|
if self.coverimage:
|
|
outputfp.write(coverimage,'OEBPS/Pictures/cover.jpg', zipfile.ZIP_STORED)
|
|
zout = zipfile.ZipInfo('OEBPS/cover.xhtml', now)
|
|
zout.compress_type = zipfile.ZIP_DEFLATED
|
|
zout.external_attr = UNIXPERMS
|
|
outputfp.writestr(zout, self.coverhtml)
|
|
|
|
# zout = zipfile.ZipInfo('OEBPS/styles.css', now)
|
|
# zout.compress_type = zipfile.ZIP_DEFLATED
|
|
# zout.external_attr = UNIXPERMS
|
|
# css = self.odhandler.css().encode('us-ascii','xmlcharrefreplace')
|
|
# outputfp.writestr(zout, css)
|
|
|
|
try:
|
|
opts, args = getopt.getopt(sys.argv[1:], "c:po:", ["cover", "plain","output"])
|
|
except getopt.GetoptError:
|
|
usage()
|
|
sys.exit(2)
|
|
|
|
generatecss = True
|
|
embedable = False
|
|
outputfilename = "-"
|
|
coverimage = None
|
|
|
|
for o, a in opts:
|
|
if o in ("-c", "--cover"):
|
|
coverimage = a
|
|
if o in ("-p", "--plain"):
|
|
generatecss = False
|
|
if o in ("-o", "--output"):
|
|
outputfilename = a
|
|
|
|
if len(args) != 1:
|
|
usage()
|
|
sys.exit(2)
|
|
|
|
try:
|
|
epub = EPublication(args[0], coverimage)
|
|
epub.save(outputfilename)
|
|
except:
|
|
sys.stderr.write("Unable to open file %s or file is not OpenDocument\n" % sys.argv[1])
|
|
sys.exit(1)
|
|
sys.exit(0)
|
|
|