detect merged cells, resolve #25

This commit is contained in:
chfw 2017-12-08 22:44:34 +00:00
parent 13ccfa7e09
commit 4cea637cce
8 changed files with 62 additions and 5 deletions

View File

@ -6,6 +6,8 @@
{%block description%}
**pyexcel-{{file_type}}** is a tiny wrapper library to read, manipulate and write data in {{file_type}} format and it can read xlsx and xlsm fromat. You are likely to use it with `pyexcel <https://github.com/pyexcel/pyexcel>`_.
:fire: New flag: `detect_merged_cells` allows you to spread the same value among all merged cells. But be aware that this may slow down its reading performance.
New flag: `skip_hidden_row_and_column` allows you to skip hidden rows and columns and is defaulted to **True**. It may slow down its reading performance. And it is only valid for 'xls' files. For 'xlsx' files, please use pyexcel-xlsx.
{%endblock%}

View File

@ -1,6 +1,15 @@
Change log
================================================================================
0.5.5 - unreleased
--------------------------------------------------------------------------------
Added
********************************************************************************
#. `#25 <https://github.com/pyexcel/pyexcel-xls/issues/25>`_, detect merged
cell in .xls
0.5.4 - 2.11.2017
--------------------------------------------------------------------------------

View File

@ -17,6 +17,8 @@ pyexcel-xls - Let you focus on data, instead of xls format
**pyexcel-xls** is a tiny wrapper library to read, manipulate and write data in xls format and it can read xlsx and xlsm fromat. You are likely to use it with `pyexcel <https://github.com/pyexcel/pyexcel>`_.
:fire: New flag: `detect_merged_cells` allows you to spread the same value among all merged cells. But be aware that this may slow down its reading performance.
New flag: `skip_hidden_row_and_column` allows you to skip hidden rows and columns and is defaulted to **True**. It may slow down its reading performance. And it is only valid for 'xls' files. For 'xlsx' files, please use pyexcel-xlsx.

View File

@ -1,8 +1,8 @@
overrides: "pyexcel.yaml"
name: "pyexcel-xls"
nick_name: xls
version: 0.5.4
current_version: 0.5.4
version: 0.5.5
current_version: 0.5.5
release: 0.5.4
file_type: xls
dependencies:

View File

@ -12,7 +12,7 @@ import xlrd
from pyexcel_io.book import BookReader
from pyexcel_io.sheet import SheetReader
from pyexcel_io._compact import OrderedDict
from pyexcel_io._compact import OrderedDict, irange
from pyexcel_io.service import has_no_digits_in_float
@ -23,6 +23,21 @@ XLS_KEYWORDS = [
]
class MergedCell(object):
def __init__(self, row_low, row_high, column_low, column_high):
self.__rl = row_low
self.__rh = row_high
self.__cl = column_low
self.__ch = column_high
self.value = None
def register_cells(self, registry):
for rowx in irange(self.__rl, self.__rh):
for colx in irange(self.__cl, self.__ch):
key = "%s-%s" % (rowx, colx)
registry[key] = self
class XLSheet(SheetReader):
"""
xls, xlsx, xlsm sheet reader
@ -34,6 +49,11 @@ class XLSheet(SheetReader):
self.__auto_detect_int = auto_detect_int
self.__hidden_cols = []
self.__hidden_rows = []
self.__merged_cells = {}
if keywords.get('detect_merged_cells') is True:
for merged_cell_ranges in sheet.merged_cells:
merged_cells = MergedCell(*merged_cell_ranges)
merged_cells.register_cells(self.__merged_cells)
if keywords.get('skip_hidden_row_and_column') is True:
for col_index, info in self._native_sheet.colinfo_map.items():
if info.hidden == 1:
@ -62,14 +82,23 @@ class XLSheet(SheetReader):
"""
Random access to the xls cells
"""
row, column = self._offset_hidden_indices(row, column)
if self._keywords.get('skip_hidden_row_and_column') is True:
row, column = self._offset_hidden_indices(row, column)
cell_type = self._native_sheet.cell_type(row, column)
value = self._native_sheet.cell_value(row, column)
if cell_type == xlrd.XL_CELL_DATE:
value = xldate_to_python_date(value)
elif cell_type == xlrd.XL_CELL_NUMBER and self.__auto_detect_int:
if has_no_digits_in_float(value):
value = int(value)
if self.__merged_cells:
merged_cell = self.__merged_cells.get("%s-%s" % (row, column))
if merged_cell:
if merged_cell.value:
value = merged_cell.value
else:
merged_cell.value = value
return value
def _offset_hidden_indices(self, row, column):
@ -97,6 +126,7 @@ class XLSBook(BookReader):
self._file_content = None
self.__skip_hidden_sheets = True
self.__skip_hidden_row_column = True
self.__detect_merged_cells = False
def open(self, file_name, **keywords):
self.__parse_keywords(**keywords)
@ -115,6 +145,7 @@ class XLSBook(BookReader):
self.__skip_hidden_sheets = keywords.get('skip_hidden_sheets', True)
self.__skip_hidden_row_column = keywords.get(
'skip_hidden_row_and_column', True)
self.__detect_merged_cells = keywords.get('detect_merged_cells', False)
def close(self):
if self._native_book:
@ -163,6 +194,8 @@ class XLSBook(BookReader):
raise IOError("No valid file name or file content found.")
if self.__skip_hidden_row_column and self._file_type == 'xls':
xlrd_params['formatting_info'] = True
if self.__detect_merged_cells:
xlrd_params['formatting_info'] = True
xls_book = xlrd.open_workbook(**xlrd_params)
return xls_book

View File

@ -9,7 +9,7 @@ PY26 = PY2 and sys.version_info[1] < 7
NAME = 'pyexcel-xls'
AUTHOR = 'C.W.'
VERSION = '0.5.4'
VERSION = '0.5.5'
EMAIL = 'wangc_2011@hotmail.com'
LICENSE = 'New BSD'
DESCRIPTION = (

BIN
tests/fixtures/merged-cell-sheet.xls vendored Executable file

Binary file not shown.

View File

@ -0,0 +1,11 @@
import os
from pyexcel_xls import get_data
from nose.tools import eq_
def test_merged_cells():
data = get_data(os.path.join("tests", "fixtures", "merged-cell-sheet.xls"),
detect_merged_cells=True,
library="pyexcel-xls")
expected = [[1, 2, 3], [1, 5, 6], [1, 8, 9], [10, 11, 11]]
eq_(data['Sheet1'], expected)