debian-python-odf/odfmeta/odfmeta

267 lines
8.2 KiB
Python
Executable File

#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
#
# This is free software. You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
# 2 or at your option any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
# Contributor(s):
#
import zipfile, time, sys, getopt, re
import xml.sax, xml.sax.saxutils
from odf.namespaces import TOOLSVERSION, OFFICENS, XLINKNS, DCNS, METANS
from io import BytesIO
OUTENCODING="utf-8"
whitespace = re.compile(r'\s+')
fields = {
'title': (DCNS,u'title'),
'description': (DCNS,u'description'),
'subject': (DCNS,u'subject'),
'creator': (DCNS,u'creator'),
'date': (DCNS,u'date'),
'language': (DCNS,u'language'),
'generator': (METANS,u'generator'),
'initial-creator': (METANS,u'initial-creator'),
'keyword': (METANS,u'keyword'),
'editing-duration': (METANS,u'editing-duration'),
'editing-cycles': (METANS,u'editing-cycles'),
'printed-by': (METANS,u'printed-by'),
'print-date': (METANS,u'print-date'),
'creation-date': (METANS,u'creation-date'),
'user-defined': (METANS,u'user-defined'),
#'template': (METANS,u'template'),
}
xfields = []
Xfields = []
addfields = {}
deletefields = {}
yieldfields = {}
showversion = None
def exitwithusage(exitcode=2):
""" print out usage information """
sys.stderr.write("Usage: %s [-cdlvV] [-xXaAI metafield]... [-o output] [inputfile]\n" % sys.argv[0])
sys.stderr.write("\tInputfile must be OpenDocument format\n")
sys.exit(exitcode)
def normalize(str):
"""
The normalize-space function returns the argument string with whitespace
normalized by stripping leading and trailing whitespace and replacing
sequences of whitespace characters by a single space.
"""
return whitespace.sub(' ', str).strip()
class MetaCollector:
"""
The MetaCollector is a pseudo file object, that can temporarily ignore write-calls
It could probably be replaced with a StringIO object.
"""
def __init__(self):
self._content = []
self.dowrite = True
def write(self, str):
if self.dowrite:
self._content.append(str)
def content(self):
return ''.join(self._content)
base = xml.sax.saxutils.XMLGenerator
class odfmetaparser(base):
""" Parse a meta.xml file with an event-driven parser and replace elements.
It would probably be a cleaner approach to use a DOM based parser and
then manipulate in memory.
Small issue: Reorders elements
"""
version = 'Unknown'
def __init__(self):
self._mimetype = ''
self.output = MetaCollector()
self._data = []
self.seenfields = {}
base.__init__(self, self.output, OUTENCODING)
def startElementNS(self, name, qname, attrs):
self._data = []
field = name
# I can't modify the template until the tool replaces elements at the same
# location and not at the end
# if name == (METANS,u'template'):
# self._data = [attrs.get((XLINKNS,u'title'),'')]
if showversion and name == (OFFICENS,u'document-meta'):
if showversion == '-V':
print ("version:%s" % attrs.get((OFFICENS,u'version'),'Unknown').decode('utf-8'))
else:
print ("%s" % attrs.get((OFFICENS,u'version'),'Unknown').decode('utf-8'))
if name == (METANS,u'user-defined'):
field = attrs.get((METANS,u'name'))
if field in deletefields:
self.output.dowrite = False
elif field in yieldfields:
del addfields[field]
base.startElementNS(self, name, qname, attrs)
else:
base.startElementNS(self, name, qname, attrs)
self._tag = field
def endElementNS(self, name, qname):
field = name
if name == (METANS,u'user-defined'):
field = self._tag
if name == (OFFICENS,u'meta'):
for k,v in addfields.items():
if len(v) > 0:
if type(k) == type(''):
base.startElementNS(self,(METANS,u'user-defined'),None,{(METANS,u'name'):k})
base.characters(self, v)
base.endElementNS(self, (METANS,u'user-defined'),None)
else:
base.startElementNS(self, k, None, {})
base.characters(self, v)
base.endElementNS(self, k, None)
if name in xfields:
print ("%s" % self.data())
if name in Xfields:
if isinstance(self._tag, tuple):
texttag = self._tag[1]
else:
texttag = self._tag
print ("%s:%s" % (texttag, self.data()))
if field in deletefields:
self.output.dowrite = True
else:
base.endElementNS(self, name, qname)
def characters(self, content):
base.characters(self, content)
self._data.append(content)
def meta(self):
return self.output.content()
def data(self):
if usenormalize:
return normalize(''.join(self._data))
else:
return ''.join(self._data)
now = time.localtime()[:6]
outputfile = "-"
writemeta = False # Do we change any meta data?
usenormalize = False
try:
opts, args = getopt.getopt(sys.argv[1:], "cdlvVI:A:a:o:x:X:")
except getopt.GetoptError:
exitwithusage()
if len(opts) == 0:
opts = [ ('-l','') ]
for o, a in opts:
if o in ('-a','-A','-I'):
writemeta = True
if a.find(":") >= 0:
k,v = a.split(":",1)
else:
k,v = (a, "")
if len(k) == 0:
exitwithusage()
k = fields.get(k,k)
addfields[k] = unicode(v,'utf-8')
if o == '-a':
yieldfields[k] = True
if o == '-I':
deletefields[k] = True
if o == '-d':
writemeta = True
addfields[(DCNS,u'date')] = "%04d-%02d-%02dT%02d:%02d:%02d" % now
deletefields[(DCNS,u'date')] = True
if o == '-c':
usenormalize = True
if o in ('-v', '-V'):
showversion = o
if o == '-l':
Xfields = fields.values()
if o == "-x":
xfields.append(fields.get(a,a))
if o == "-X":
Xfields.append(fields.get(a,a))
if o == "-o":
outputfile = a
# The specification says we should change the element to our own,
# and must not export the original identifier.
if writemeta:
addfields[(METANS,u'generator')] = TOOLSVERSION
deletefields[(METANS,u'generator')] = True
odfs = odfmetaparser()
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces, 1)
parser.setContentHandler(odfs)
if len(args) == 0:
zin = zipfile.ZipFile(sys.stdin,'r')
else:
if not zipfile.is_zipfile(args[0]):
exitwithusage()
zin = zipfile.ZipFile(args[0], 'r')
try:
content = zin.read('meta.xml').decode('utf-8')
except:
sys.stderr.write("File has no meta data\n")
sys.exit(1)
parser.parse(BytesIO(content.encode('utf-8')))
if writemeta:
if outputfile == '-':
if sys.stdout.isatty():
sys.stderr.write("Won't write ODF file to terminal\n")
sys.exit(1)
zout = zipfile.ZipFile(sys.stdout,"w")
else:
zout = zipfile.ZipFile(outputfile,"w")
# Loop through the input zipfile and copy the content to the output until we
# get to the meta.xml. Then substitute.
for zinfo in zin.infolist():
if zinfo.filename == "meta.xml":
# Write meta
zi = zipfile.ZipInfo("meta.xml", now)
zi.compress_type = zipfile.ZIP_DEFLATED
zout.writestr(zi,odfs.meta() )
else:
payload = zin.read(zinfo.filename)
zout.writestr(zinfo, payload)
zout.close()
zin.close()
# Local Variables: ***
# mode: python ***
# End: ***