288 lines
8.9 KiB
Python
288 lines
8.9 KiB
Python
# Copyright 2011 Marco Conti
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# Thanks to grt for the fixes
|
|
import sys
|
|
import math
|
|
|
|
from odf.table import TableRow, TableCell, Table
|
|
from odf.text import P
|
|
from odf.namespaces import OFFICENS
|
|
from odf.opendocument import OpenDocumentSpreadsheet, load
|
|
|
|
from pyexcel_io.book import BookReader, BookWriter
|
|
from pyexcel_io.sheet import SheetReader, SheetWriter
|
|
|
|
import pyexcel_ods.converter as converter
|
|
|
|
PY2 = sys.version_info[0] == 2
|
|
|
|
PY27_BELOW = PY2 and sys.version_info[1] < 7
|
|
if PY27_BELOW:
|
|
from ordereddict import OrderedDict
|
|
else:
|
|
from collections import OrderedDict
|
|
|
|
|
|
class ODSSheet(SheetReader):
|
|
"""native ods sheet"""
|
|
def __init__(self, sheet, auto_detect_int=True, **keywords):
|
|
SheetReader.__init__(self, sheet, **keywords)
|
|
self.auto_detect_int = auto_detect_int
|
|
self.rows = self.native_sheet.getElementsByType(TableRow)
|
|
self.cached_rows = {}
|
|
self._number_of_rows = len(self.rows)
|
|
self._number_of_columns = self._find_columns()
|
|
|
|
def number_of_rows(self):
|
|
return self._number_of_rows
|
|
|
|
def number_of_columns(self):
|
|
return self._number_of_columns
|
|
|
|
@property
|
|
def name(self):
|
|
return self.native_sheet.getAttribute("name")
|
|
|
|
def _cell_value(self, row, column):
|
|
current_row = self.rows[row]
|
|
cells = current_row.getElementsByType(TableCell)
|
|
cell_value = None
|
|
if str(row) in self.cached_rows:
|
|
row_cache = self.cached_rows[str(row)]
|
|
cell_value = row_cache[column]
|
|
return cell_value
|
|
|
|
try:
|
|
cell = cells[column]
|
|
cell_value = self._read_cell(cell)
|
|
except IndexError:
|
|
cell_value = None
|
|
return cell_value
|
|
|
|
def _read_row(self, cells):
|
|
tmp_row = []
|
|
for cell in cells:
|
|
# repeated value?
|
|
repeat = cell.getAttribute("numbercolumnsrepeated")
|
|
cell_value = self._read_cell(cell)
|
|
if repeat:
|
|
number_of_repeat = int(repeat)
|
|
tmp_row += [cell_value] * number_of_repeat
|
|
else:
|
|
tmp_row.append(cell_value)
|
|
return tmp_row
|
|
|
|
def _read_text_cell(self, cell):
|
|
text_content = []
|
|
paragraphs = cell.getElementsByType(P)
|
|
# for each text node
|
|
for paragraph in paragraphs:
|
|
for node in paragraph.childNodes:
|
|
if (node.nodeType == 3):
|
|
if PY2:
|
|
text_content.append(unicode(node.data))
|
|
else:
|
|
text_content.append(node.data)
|
|
return '\n'.join(text_content)
|
|
|
|
def _read_cell(self, cell):
|
|
cell_type = cell.getAttrNS(OFFICENS, "value-type")
|
|
value_token = converter.VALUE_TOKEN.get(cell_type, "value")
|
|
ret = None
|
|
if cell_type == "string":
|
|
text_content = self._read_text_cell(cell)
|
|
ret = text_content
|
|
else:
|
|
if cell_type in converter.VALUE_CONVERTERS:
|
|
value = cell.getAttrNS(OFFICENS, value_token)
|
|
n_value = converter.VALUE_CONVERTERS[cell_type](value)
|
|
if cell_type == 'float' and self.auto_detect_int:
|
|
if is_integer_ok_for_xl_float(n_value):
|
|
n_value = int(n_value)
|
|
ret = n_value
|
|
else:
|
|
text_content = self._read_text_cell(cell)
|
|
ret = text_content
|
|
return ret
|
|
|
|
def _find_columns(self):
|
|
max = -1
|
|
for row_index, row in enumerate(self.rows):
|
|
cells = row.getElementsByType(TableCell)
|
|
if self._check_for_column_repeat(cells):
|
|
row_cache = self._read_row(cells)
|
|
self.cached_rows.update({str(row_index): row_cache})
|
|
length = len(row_cache)
|
|
else:
|
|
length = len(cells)
|
|
if length > max:
|
|
max = length
|
|
return max
|
|
|
|
def _check_for_column_repeat(self, cells):
|
|
found_repeated_columns = False
|
|
for cell in cells:
|
|
repeat = cell.getAttribute("numbercolumnsrepeated")
|
|
if repeat:
|
|
found_repeated_columns = True
|
|
break
|
|
return found_repeated_columns
|
|
|
|
|
|
class ODSBook(BookReader):
|
|
"""read ods book"""
|
|
|
|
def open(self, file_name, **keywords):
|
|
"""open ods file"""
|
|
BookReader.open(self, file_name, **keywords)
|
|
self._load_from_file()
|
|
|
|
def open_stream(self, file_stream, **keywords):
|
|
"""open ods file stream"""
|
|
BookReader.open_stream(self, file_stream, **keywords)
|
|
self._load_from_memory()
|
|
|
|
def read_sheet_by_name(self, sheet_name):
|
|
"""read a named sheet"""
|
|
tables = self.native_book.spreadsheet.getElementsByType(Table)
|
|
rets = [table for table in tables
|
|
if table.getAttribute('name') == sheet_name]
|
|
if len(rets) == 0:
|
|
raise ValueError("%s cannot be found" % sheet_name)
|
|
else:
|
|
return self.read_sheet(rets[0])
|
|
|
|
def read_sheet_by_index(self, sheet_index):
|
|
"""read a sheet at a specified index"""
|
|
tables = self.native_book.spreadsheet.getElementsByType(Table)
|
|
length = len(tables)
|
|
if sheet_index < length:
|
|
return self.read_sheet(tables[sheet_index])
|
|
else:
|
|
raise IndexError("Index %d of out bound %d" % (
|
|
sheet_index, length))
|
|
|
|
def read_all(self):
|
|
"""read all sheets"""
|
|
result = OrderedDict()
|
|
for sheet in self.native_book.spreadsheet.getElementsByType(Table):
|
|
ods_sheet = ODSSheet(sheet, **self.keywords)
|
|
result[ods_sheet.name] = ods_sheet.to_array()
|
|
|
|
return result
|
|
|
|
def read_sheet(self, native_sheet):
|
|
"""read one native sheet"""
|
|
sheet = ODSSheet(native_sheet, **self.keywords)
|
|
return {sheet.name: sheet.to_array()}
|
|
|
|
def _load_from_memory(self):
|
|
self.native_book = load(self.file_stream)
|
|
|
|
def _load_from_file(self):
|
|
self.native_book = load(self.file_name)
|
|
pass
|
|
|
|
|
|
class ODSSheetWriter(SheetWriter):
|
|
"""
|
|
ODS sheet writer
|
|
"""
|
|
def set_sheet_name(self, name):
|
|
"""initialize the native table"""
|
|
self.native_sheet = Table(name=name)
|
|
|
|
def set_size(self, size):
|
|
"""not used in this class but used in ods3"""
|
|
pass
|
|
|
|
def write_cell(self, row, cell):
|
|
"""write a native cell"""
|
|
cell_to_be_written = TableCell()
|
|
cell_type = type(cell)
|
|
cell_odf_type = converter.ODS_WRITE_FORMAT_COVERSION.get(
|
|
cell_type, "string")
|
|
cell_to_be_written.setAttrNS(OFFICENS, "value-type", cell_odf_type)
|
|
cell_odf_value_token = converter.VALUE_TOKEN.get(
|
|
cell_odf_type, "value")
|
|
converter_func = converter.ODS_VALUE_CONVERTERS.get(
|
|
cell_odf_type, None)
|
|
if converter_func:
|
|
cell = converter_func(cell)
|
|
if cell_odf_type != 'string':
|
|
cell_to_be_written.setAttrNS(OFFICENS, cell_odf_value_token, cell)
|
|
cell_to_be_written.addElement(P(text=cell))
|
|
else:
|
|
lines = cell.split('\n')
|
|
for line in lines:
|
|
cell_to_be_written.addElement(P(text=line))
|
|
row.addElement(cell_to_be_written)
|
|
|
|
def write_row(self, array):
|
|
"""
|
|
write a row into the file
|
|
"""
|
|
row = TableRow()
|
|
self.native_sheet.addElement(row)
|
|
for cell in array:
|
|
self.write_cell(row, cell)
|
|
|
|
def close(self):
|
|
"""
|
|
This call writes file
|
|
|
|
"""
|
|
self.native_book.spreadsheet.addElement(self.native_sheet)
|
|
|
|
|
|
class ODSWriter(BookWriter):
|
|
"""
|
|
open document spreadsheet writer
|
|
|
|
"""
|
|
def __init__(self):
|
|
BookWriter.__init__(self)
|
|
self.native_book = OpenDocumentSpreadsheet()
|
|
|
|
def create_sheet(self, name):
|
|
"""
|
|
write a row into the file
|
|
"""
|
|
return ODSSheetWriter(self.native_book, None, name)
|
|
|
|
def close(self):
|
|
"""
|
|
This call writes file
|
|
|
|
"""
|
|
self.native_book.write(self.file_alike_object)
|
|
|
|
|
|
def is_integer_ok_for_xl_float(value):
|
|
"""check if a float had zero value in digits"""
|
|
return value == math.floor(value)
|
|
|
|
|
|
_ods_registry = {
|
|
"file_type": "ods",
|
|
"reader": ODSBook,
|
|
"writer": ODSWriter,
|
|
"stream_type": "binary",
|
|
"mime_type": "application/vnd.oasis.opendocument.spreadsheet",
|
|
"library": "pyexcel-ods"
|
|
}
|
|
|
|
exports = (_ods_registry,)
|