134 lines
4.5 KiB
Python
134 lines
4.5 KiB
Python
#! /usr/bin/env python
|
|
# -*- coding: UTF-8 -*-
|
|
|
|
# TabellioOOo - OpenDocument Spreadsheet to XML data files
|
|
# Copyright (C) 2007-2010 Parlement de la Communauté française de Belgique
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
from cStringIO import StringIO
|
|
from optparse import OptionParser
|
|
try:
|
|
import xml.etree.ElementTree as ET
|
|
except ImportError:
|
|
import elementtree.ElementTree as ET
|
|
|
|
import zipfile
|
|
import sys
|
|
import os
|
|
|
|
OFFICE_NS = 'urn:oasis:names:tc:opendocument:xmlns:office:1.0'
|
|
TABLE_NS = 'urn:oasis:names:tc:opendocument:xmlns:table:1.0'
|
|
|
|
XML_NS = 'http://www.pcf.be/namespaces/private/wordaddin/structs'
|
|
|
|
|
|
def get_cell_text(cell):
|
|
text = ''
|
|
for child in cell.getchildren():
|
|
text += child.text
|
|
return text.strip()
|
|
|
|
def convert(input_filename, output_directory):
|
|
'''
|
|
Convert a spreadsheet file to several XML documents
|
|
'''
|
|
|
|
z = zipfile.ZipFile(input_filename)
|
|
content = None
|
|
for zfile in z.namelist():
|
|
if zfile == 'content.xml':
|
|
content = z.read(zfile)
|
|
break
|
|
|
|
content_tree = ET.ElementTree(ET.fromstring(content))
|
|
for table in content_tree.findall('{%s}body/{%s}spreadsheet/{%s}table' % (
|
|
OFFICE_NS, OFFICE_NS, TABLE_NS)):
|
|
table_name = table.attrib.get('{%s}name' % TABLE_NS)
|
|
default_attrs = {}
|
|
top_node = None
|
|
if table_name == 'Parls':
|
|
default_attrs['classname'] = 'PARL'
|
|
top_node = ET.Element('SParlSpeaker-list')
|
|
child_node_name = 'SParlSpeaker'
|
|
elif table_name == 'Ministres':
|
|
default_attrs['classname'] = 'MINISTRE'
|
|
top_node = ET.Element('SMinistreSpeaker-list')
|
|
child_node_name = 'SMinistreSpeaker'
|
|
elif table_name == 'PresComs':
|
|
top_node = ET.Element('SPresComSpeaker-list')
|
|
child_node_name = 'SPresComSpeaker'
|
|
elif table_name == 'President':
|
|
top_node = None
|
|
child_node_name = 'SParlSpeaker'
|
|
elif table_name == 'Commissions':
|
|
top_node = ET.Element('MCOMSInfo-list')
|
|
child_node_name = 'MCOMSInfo'
|
|
else:
|
|
continue
|
|
rows = table.findall('{%s}table-row' % TABLE_NS)
|
|
col_attrs = []
|
|
for cell in rows[0].findall('{%s}table-cell' % TABLE_NS):
|
|
text = get_cell_text(cell)
|
|
if text:
|
|
col_attrs.append(text)
|
|
for row in rows[1:]:
|
|
attrs = default_attrs.copy()
|
|
i = 0
|
|
for cell in row.findall('{%s}table-cell' % TABLE_NS):
|
|
repeated = cell.attrib.get('{%s}number-columns-repeated' % TABLE_NS)
|
|
if repeated:
|
|
for j in range(int(repeated)):
|
|
try:
|
|
attrs[col_attrs[i]] = get_cell_text(cell)
|
|
except IndexError:
|
|
continue
|
|
i += 1
|
|
else:
|
|
try:
|
|
attrs[col_attrs[i]] = get_cell_text(cell)
|
|
except IndexError:
|
|
continue
|
|
i += 1
|
|
|
|
if not attrs.get(col_attrs[0]):
|
|
continue
|
|
|
|
if top_node is not None:
|
|
node = ET.SubElement(top_node, child_node_name)
|
|
else:
|
|
node = ET.Element(child_node_name)
|
|
top_node = node
|
|
|
|
for key, value in attrs.items():
|
|
ET.SubElement(node, key).text = value
|
|
|
|
if top_node is None:
|
|
continue
|
|
# get content as an XML tree
|
|
out = file(os.path.join(output_directory, '%s.xml' % table_name), 'w')
|
|
ET.ElementTree(top_node).write(out)
|
|
out.close()
|
|
|
|
|
|
def main():
|
|
parser = OptionParser()
|
|
options, args = parser.parse_args()
|
|
convert(args[0], args[1])
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
|