This repository has been archived on 2023-02-21. You can view files and clone it, but cannot push or open issues or pull requests.
tabellioOOo/ods2xmls/ods2xmls.py

134 lines
4.5 KiB
Python

#! /usr/bin/env python
# -*- coding: UTF-8 -*-
# TabellioOOo - OpenDocument Spreadsheet to XML data files
# Copyright (C) 2007-2010 Parlement de la Communauté française de Belgique
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
from cStringIO import StringIO
from optparse import OptionParser
try:
import xml.etree.ElementTree as ET
except ImportError:
import elementtree.ElementTree as ET
import zipfile
import sys
import os
OFFICE_NS = 'urn:oasis:names:tc:opendocument:xmlns:office:1.0'
TABLE_NS = 'urn:oasis:names:tc:opendocument:xmlns:table:1.0'
XML_NS = 'http://www.pcf.be/namespaces/private/wordaddin/structs'
def get_cell_text(cell):
text = ''
for child in cell.getchildren():
text += child.text
return text.strip()
def convert(input_filename, output_directory):
'''
Convert a spreadsheet file to several XML documents
'''
z = zipfile.ZipFile(input_filename)
content = None
for zfile in z.namelist():
if zfile == 'content.xml':
content = z.read(zfile)
break
content_tree = ET.ElementTree(ET.fromstring(content))
for table in content_tree.findall('{%s}body/{%s}spreadsheet/{%s}table' % (
OFFICE_NS, OFFICE_NS, TABLE_NS)):
table_name = table.attrib.get('{%s}name' % TABLE_NS)
default_attrs = {}
top_node = None
if table_name == 'Parls':
default_attrs['classname'] = 'PARL'
top_node = ET.Element('SParlSpeaker-list')
child_node_name = 'SParlSpeaker'
elif table_name == 'Ministres':
default_attrs['classname'] = 'MINISTRE'
top_node = ET.Element('SMinistreSpeaker-list')
child_node_name = 'SMinistreSpeaker'
elif table_name == 'PresComs':
top_node = ET.Element('SPresComSpeaker-list')
child_node_name = 'SPresComSpeaker'
elif table_name == 'President':
top_node = None
child_node_name = 'SParlSpeaker'
elif table_name == 'Commissions':
top_node = ET.Element('MCOMSInfo-list')
child_node_name = 'MCOMSInfo'
else:
continue
rows = table.findall('{%s}table-row' % TABLE_NS)
col_attrs = []
for cell in rows[0].findall('{%s}table-cell' % TABLE_NS):
text = get_cell_text(cell)
if text:
col_attrs.append(text)
for row in rows[1:]:
attrs = default_attrs.copy()
i = 0
for cell in row.findall('{%s}table-cell' % TABLE_NS):
repeated = cell.attrib.get('{%s}number-columns-repeated' % TABLE_NS)
if repeated:
for j in range(int(repeated)):
try:
attrs[col_attrs[i]] = get_cell_text(cell)
except IndexError:
continue
i += 1
else:
try:
attrs[col_attrs[i]] = get_cell_text(cell)
except IndexError:
continue
i += 1
if not attrs.get(col_attrs[0]):
continue
if top_node is not None:
node = ET.SubElement(top_node, child_node_name)
else:
node = ET.Element(child_node_name)
top_node = node
for key, value in attrs.items():
ET.SubElement(node, key).text = value
if top_node is None:
continue
# get content as an XML tree
out = file(os.path.join(output_directory, '%s.xml' % table_name), 'w')
ET.ElementTree(top_node).write(out)
out.close()
def main():
parser = OptionParser()
options, args = parser.parse_args()
convert(args[0], args[1])
if __name__ == '__main__':
main()