230 lines
8.8 KiB
Python
Executable File
230 lines
8.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
# Copyright (C) 2008 Agustin Henze -> agustinhenze at gmail.com
|
|
#
|
|
# This is free software. You may redistribute it under the terms
|
|
# of the Apache license and the GNU General Public License Version
|
|
# 2 or at your option any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public
|
|
# License along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
#
|
|
# Contributor(s):
|
|
#
|
|
# Søren Roug
|
|
#
|
|
# Oct 2014: Georges Khaznadar <georgesk@debian.org>
|
|
# - ported to Python3
|
|
# - imlemented the missing switch -c / --encoding, with an extra
|
|
# feature for POSIX platforms which can guess encoding.
|
|
|
|
from odf.opendocument import OpenDocumentSpreadsheet
|
|
from odf.style import Style, TextProperties, ParagraphProperties, TableColumnProperties
|
|
from odf.text import P
|
|
from odf.table import Table, TableColumn, TableRow, TableCell
|
|
from optparse import OptionParser
|
|
import sys,csv,re, os, codecs
|
|
|
|
if sys.version_info[0]==3: unicode=str
|
|
|
|
if sys.version_info[0]==2:
|
|
class UTF8Recoder:
|
|
"""
|
|
Iterator that reads an encoded stream and reencodes the input to UTF-8
|
|
"""
|
|
def __init__(self, f, encoding):
|
|
self.reader = codecs.getreader(encoding)(f)
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def next(self):
|
|
return self.reader.next().encode("utf-8")
|
|
|
|
class UnicodeReader:
|
|
"""
|
|
A CSV reader which will iterate over lines in the CSV file "f",
|
|
which is encoded in the given encoding.
|
|
"""
|
|
|
|
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
|
|
f = UTF8Recoder(f, encoding)
|
|
self.reader = csv.reader(f, dialect=dialect, **kwds)
|
|
|
|
def next(self):
|
|
row = self.reader.next()
|
|
return [unicode(s, "utf-8") for s in row]
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
|
|
def csvToOds( pathFileCSV, pathFileODS, tableName='table',
|
|
delimiter=',', quoting=csv.QUOTE_MINIMAL,
|
|
quotechar = '"', escapechar = None,
|
|
skipinitialspace = False, lineterminator = '\r\n',
|
|
encoding="utf-8"):
|
|
textdoc = OpenDocumentSpreadsheet()
|
|
# Create a style for the table content. One we can modify
|
|
# later in the word processor.
|
|
tablecontents = Style(name="Table Contents", family="paragraph")
|
|
tablecontents.addElement(ParagraphProperties(numberlines="false", linenumber="0"))
|
|
tablecontents.addElement(TextProperties(fontweight="bold"))
|
|
textdoc.styles.addElement(tablecontents)
|
|
|
|
# Start the table
|
|
table = Table( name=tableName )
|
|
|
|
if sys.version_info[0]==3:
|
|
reader = csv.reader(open(pathFileCSV, encoding=encoding),
|
|
delimiter=delimiter,
|
|
quoting=quoting,
|
|
quotechar=quotechar,
|
|
escapechar=escapechar,
|
|
skipinitialspace=skipinitialspace,
|
|
lineterminator=lineterminator)
|
|
else:
|
|
reader = UnicodeReader(open(pathFileCSV),
|
|
encoding=encoding,
|
|
delimiter=delimiter,
|
|
quoting=quoting,
|
|
quotechar=quotechar,
|
|
escapechar=escapechar,
|
|
skipinitialspace=skipinitialspace,
|
|
lineterminator=lineterminator)
|
|
fltExp = re.compile('^\s*[-+]?\d+(\.\d+)?\s*$')
|
|
|
|
for row in reader:
|
|
tr = TableRow()
|
|
table.addElement(tr)
|
|
for val in row:
|
|
if fltExp.match(val):
|
|
tc = TableCell(valuetype="float", value=val.strip())
|
|
else:
|
|
tc = TableCell(valuetype="string")
|
|
tr.addElement(tc)
|
|
p = P(stylename=tablecontents,text=val)
|
|
tc.addElement(p)
|
|
|
|
textdoc.spreadsheet.addElement(table)
|
|
textdoc.save( pathFileODS )
|
|
|
|
if __name__ == "__main__":
|
|
usage = "%prog -i file.csv -o file.ods -d"
|
|
parser = OptionParser(usage=usage, version="%prog 0.1")
|
|
parser.add_option('-i','--input', action='store',
|
|
dest='input', help='File input in csv')
|
|
parser.add_option('-o','--output', action='store',
|
|
dest='output', help='File output in ods')
|
|
parser.add_option('-d','--delimiter', action='store',
|
|
dest='delimiter', help='specifies a one-character string to use as the field separator. It defaults to ",".')
|
|
|
|
parser.add_option('-c','--encoding', action='store',
|
|
dest='encoding', help='specifies the encoding the file csv. It defaults to utf-8')
|
|
|
|
parser.add_option('-t','--table', action='store',
|
|
dest='tableName', help='The table name in the output file')
|
|
|
|
parser.add_option('-s','--skipinitialspace',
|
|
dest='skipinitialspace', help='''specifies how to interpret whitespace which
|
|
immediately follows a delimiter. It defaults to False, which
|
|
means that whitespace immediately following a delimiter is part
|
|
of the following field.''')
|
|
|
|
parser.add_option('-l','--lineterminator', action='store',
|
|
dest='lineterminator', help='''specifies the character sequence which should
|
|
terminate rows.''')
|
|
|
|
parser.add_option('-q','--quoting', action='store',
|
|
dest='quoting', help='''It can take on any of the following module constants:
|
|
0 = QUOTE_MINIMAL means only when required, for example, when a field contains either the quotechar or the delimiter
|
|
1 = QUOTE_ALL means that quotes are always placed around fields.
|
|
2 = QUOTE_NONNUMERIC means that quotes are always placed around fields which do not parse as integers or floating point numbers.
|
|
3 = QUOTE_NONE means that quotes are never placed around fields.
|
|
It defaults is QUOTE_MINIMAL''')
|
|
|
|
parser.add_option('-e','--escapechar', action='store',
|
|
dest='escapechar', help='''specifies a one-character string used to escape the delimiter when quoting is set to QUOTE_NONE.''')
|
|
|
|
parser.add_option('-r','--quotechar', action='store',
|
|
dest='quotechar', help='''specifies a one-character string to use as the quoting character. It defaults to ".''')
|
|
|
|
(options, args) = parser.parse_args()
|
|
|
|
if options.input:
|
|
pathFileCSV = options.input
|
|
else:
|
|
parser.print_help()
|
|
exit( 0 )
|
|
|
|
if options.output:
|
|
pathFileODS = options.output
|
|
else:
|
|
parser.print_help()
|
|
exit( 0 )
|
|
|
|
if options.delimiter:
|
|
delimiter = options.delimiter
|
|
else:
|
|
delimiter = ","
|
|
|
|
if options.skipinitialspace:
|
|
skipinitialspace = True
|
|
else:
|
|
skipinitialspace=False
|
|
|
|
if options.lineterminator:
|
|
lineterminator = options.lineterminator
|
|
else:
|
|
lineterminator ="\r\n"
|
|
|
|
if options.escapechar:
|
|
escapechar = options.escapechar
|
|
else:
|
|
escapechar=None
|
|
|
|
if options.tableName:
|
|
tableName = options.tableName
|
|
else:
|
|
tableName = "table"
|
|
|
|
if options.quotechar:
|
|
quotechar = options.quotechar
|
|
else:
|
|
quotechar = "\""
|
|
|
|
encoding = "utf-8" # default setting
|
|
###########################################################
|
|
## try to guess the encoding; this is implemented only with
|
|
## POSIX platforms. Can it be improved?
|
|
output = os.popen('/usr/bin/file ' + pathFileCSV).read()
|
|
m=re.match(r'^.*: ([-a-zA-Z0-9]+) text$', output)
|
|
if m:
|
|
encoding=m.group(1)
|
|
if 'ISO-8859' in encoding:
|
|
encoding="latin-1"
|
|
else:
|
|
encoding="utf-8"
|
|
############################################################
|
|
# when the -c or --coding switch is used, it takes precedence
|
|
if options.encoding:
|
|
encoding = options.encoding
|
|
|
|
csvToOds( pathFileCSV=unicode(pathFileCSV),
|
|
pathFileODS=unicode(pathFileODS),
|
|
delimiter=delimiter, skipinitialspace=skipinitialspace,
|
|
escapechar=escapechar,
|
|
lineterminator=unicode(lineterminator),
|
|
tableName=tableName, quotechar=quotechar,
|
|
encoding=encoding)
|
|
|
|
# Local Variables: ***
|
|
# mode: python ***
|
|
# End: ***
|