New style io (#36)

* 🎉 new style reader

* 🎉 new style writer and reader plugins

* This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst

* 💚 update moban update

* This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst

* 💚 use pyexcel-io dev branch

* 📚 update PR template

* 🐴 code refactoring

Co-authored-by: chfw <chfw@users.noreply.github.com>
This commit is contained in:
jaska 2020-09-29 22:26:05 +01:00 committed by GitHub
parent 25647ac365
commit d3b57b434b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 133 additions and 154 deletions

View File

@ -1,11 +1,9 @@
With your PR, here is a check list:
- [ ] Has Test cases written
- [ ] Has all code lines tested
- [ ] Has test cases written?
- [ ] Has all code lines tested?
- [ ] Has `make format` been run?
- [ ] Please update CHANGELOG.yml(not CHANGELOG.rst)
- [ ] Passes all Travis CI builds
- [ ] Has fair amount of documentation if your change is complex
- [ ] run 'make format' so as to confirm the pyexcel organisation's coding style
- [ ] Please add yourself to 'contributors' section of pyexcel-xls.yml (if not found, please use CONTRIBUTORS.rst)
- [ ] Agree on NEW BSD License for your contribution

View File

@ -5,23 +5,25 @@ jobs:
runs-on: ubuntu-latest
name: synchronize templates via moban
steps:
- uses: actions/checkout@v2
with:
ref: ${{ github.head_ref }}
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: '3.7'
- name: check changes
run: |
pip install moban gitfs2 pypifs
moban
git status
git diff --exit-code
- name: Auto-commit
if: failure()
uses: docker://cdssnc/auto-commit-github-action
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
args: This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst
- uses: actions/checkout@v2
with:
ref: ${{ github.head_ref }}
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: '3.7'
- name: check changes
run: |
pip install moban gitfs2 pypifs moban-jinja2-github moban-ansible
moban
git status
git diff --exit-code
- name: Auto-commit
if: failure()
uses: docker://cdssnc/auto-commit-github-action
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
args: >-
This is an auto-commit, updating project meta data,
such as changelog.rst, contributors.rst

4
.gitignore vendored
View File

@ -540,3 +540,7 @@ cscope.files
cscope.out
cscope.in.out
cscope.po.out
# remove moban hash dictionary
.moban.hashes

View File

@ -4,4 +4,4 @@
In alphabetical order:
* `John Vandenberg <https://api.github.com/users/jayvdb>`_
* `John Vandenberg <https://github.com/jayvdb>`_

View File

@ -6,9 +6,6 @@ test: lint
install_test:
pip install -r tests/requirements.txt
git-diff-check:
git diff --exit-code
lint:
bash lint.sh

View File

@ -1,3 +1,2 @@
pip install flake8
flake8 . --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long
python setup.py checkdocs
flake8 --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long . && python setup.py checkdocs

View File

@ -4,7 +4,7 @@
The lower level xls/xlsx/xlsm file format handler using xlrd/xlwt
:copyright: (c) 2016-2017 by Onni Software Ltd
:copyright: (c) 2016-2020 by Onni Software Ltd
:license: New BSD License
"""
@ -15,15 +15,28 @@ from pyexcel_io.io import save_data as write_data
# this line has to be place above all else
# because of dynamic import
from pyexcel_io.plugins import IOPluginInfoChain
from pyexcel_io.plugins import IOPluginInfoChainV2
__FILE_TYPE__ = "xls"
IOPluginInfoChain(__name__).add_a_reader(
relative_plugin_class_path="xlsr.XLSBook",
IOPluginInfoChainV2(__name__).add_a_reader(
relative_plugin_class_path="xlsr.XLSInFile",
locations=["file"],
file_types=[__FILE_TYPE__, "xlsx", "xlsm"],
stream_type="binary",
).add_a_reader(
relative_plugin_class_path="xlsr.XLSInMemory",
locations=["memory"],
file_types=[__FILE_TYPE__, "xlsx", "xlsm"],
stream_type="binary",
).add_a_reader(
relative_plugin_class_path="xlsr.XLSInContent",
locations=["content"],
file_types=[__FILE_TYPE__, "xlsx", "xlsm"],
stream_type="binary",
).add_a_writer(
relative_plugin_class_path="xlsw.XLSWriter",
locations=["file", "memory"],
file_types=[__FILE_TYPE__],
stream_type="binary",
)

View File

@ -4,17 +4,16 @@
The lower level xls/xlsm file format handler using xlrd
:copyright: (c) 2016-2017 by Onni Software Ltd
:copyright: (c) 2016-2020 by Onni Software Ltd
:license: New BSD License
"""
import datetime
import xlrd
from pyexcel_io.book import BookReader
from pyexcel_io.sheet import SheetReader
from pyexcel_io.service import has_no_digits_in_float
from pyexcel_io._compact import OrderedDict, irange
from pyexcel_io._compact import irange
from pyexcel_io.plugin_api.abstract_sheet import ISheet
from pyexcel_io.plugin_api.abstract_reader import IReader
XLS_KEYWORDS = [
"filename",
@ -45,7 +44,7 @@ class MergedCell(object):
registry[key] = self
class XLSheet(SheetReader):
class XLSheet(ISheet):
"""
xls, xlsx, xlsm sheet reader
@ -53,12 +52,13 @@ class XLSheet(SheetReader):
"""
def __init__(self, sheet, auto_detect_int=True, date_mode=0, **keywords):
SheetReader.__init__(self, sheet, **keywords)
self.__auto_detect_int = auto_detect_int
self.__hidden_cols = []
self.__hidden_rows = []
self.__merged_cells = {}
self._book_date_mode = date_mode
self._native_sheet = sheet
self._keywords = keywords
if keywords.get("detect_merged_cells") is True:
for merged_cell_ranges in sheet.merged_cells:
merged_cells = MergedCell(*merged_cell_ranges)
@ -75,17 +75,14 @@ class XLSheet(SheetReader):
def name(self):
return self._native_sheet.name
def number_of_rows(self):
"""
Number of rows in the xls sheet
"""
return self._native_sheet.nrows - len(self.__hidden_rows)
def row_iterator(self):
number_of_rows = self._native_sheet.nrows - len(self.__hidden_rows)
return range(number_of_rows)
def number_of_columns(self):
"""
Number of columns in the xls sheet
"""
return self._native_sheet.ncols - len(self.__hidden_cols)
def column_iterator(self, row):
number_of_columns = self._native_sheet.ncols - len(self.__hidden_cols)
for column in range(number_of_columns):
yield self.cell_value(row, column)
def cell_value(self, row, column):
"""
@ -127,93 +124,48 @@ def calculate_offsets(incoming_index, hidden_indices):
return incoming_index + offset
class XLSBook(BookReader):
class XLSReader(IReader):
"""
XLSBook reader
It reads xls, xlsm, xlsx work book
"""
def __init__(self):
BookReader.__init__(self)
self._file_content = None
self.__skip_hidden_sheets = True
self.__skip_hidden_row_column = True
self.__detect_merged_cells = False
def open(self, file_name, **keywords):
self.__parse_keywords(**keywords)
BookReader.open(self, file_name, **keywords)
def open_stream(self, file_stream, **keywords):
self.__parse_keywords(**keywords)
BookReader.open_stream(self, file_stream, **keywords)
def open_content(self, file_content, **keywords):
self.__parse_keywords(**keywords)
self._keywords = keywords
self._file_content = file_content
def __parse_keywords(self, **keywords):
def __init__(self, file_type, **keywords):
self.__skip_hidden_sheets = keywords.get("skip_hidden_sheets", True)
self.__skip_hidden_row_column = keywords.get(
"skip_hidden_row_and_column", True
)
self.__detect_merged_cells = keywords.get("detect_merged_cells", False)
self._keywords = keywords
xlrd_params = self._extract_xlrd_params()
if self.__skip_hidden_row_column and file_type == "xls":
xlrd_params["formatting_info"] = True
if self.__detect_merged_cells:
xlrd_params["formatting_info"] = True
self.content_array = []
self._native_book = self.get_xls_book(**xlrd_params)
for sheet in self._native_book.sheets():
if self.__skip_hidden_sheets and sheet.visibility != 0:
continue
self.content_array.append(sheet)
def read_sheet(self, index):
native_sheet = self.content_array[index]
sheet = XLSheet(
native_sheet,
date_mode=self._native_book.datemode,
**self._keywords
)
return sheet
def close(self):
if self._native_book:
self._native_book.release_resources()
self._native_book = None
def read_sheet_by_index(self, sheet_index):
self._native_book = self._get_book(on_demand=True)
sheet = self._native_book.sheet_by_index(sheet_index)
return self.read_sheet(sheet)
def read_sheet_by_name(self, sheet_name):
self._native_book = self._get_book(on_demand=True)
try:
sheet = self._native_book.sheet_by_name(sheet_name)
except xlrd.XLRDError:
raise ValueError("%s cannot be found" % sheet_name)
return self.read_sheet(sheet)
def read_all(self):
result = OrderedDict()
self._native_book = self._get_book()
for sheet in self._native_book.sheets():
if self.__skip_hidden_sheets and sheet.visibility != 0:
continue
data_dict = self.read_sheet(sheet)
result.update(data_dict)
return result
def read_sheet(self, native_sheet):
sheet = XLSheet(
native_sheet,
date_mode=self._native_book.datemode,
**self._keywords
)
return {sheet.name: sheet.to_array()}
def _get_book(self, on_demand=False):
xlrd_params = self._extract_xlrd_params()
xlrd_params["on_demand"] = on_demand
if self._file_name:
xlrd_params["filename"] = self._file_name
elif self._file_stream:
file_content = self._file_stream.read()
xlrd_params["file_contents"] = file_content
elif self._file_content is not None:
xlrd_params["file_contents"] = self._file_content
else:
raise IOError("No valid file name or file content found.")
if self.__skip_hidden_row_column and self._file_type == "xls":
xlrd_params["formatting_info"] = True
if self.__detect_merged_cells:
xlrd_params["formatting_info"] = True
def get_xls_book(self, **xlrd_params):
xls_book = xlrd.open_workbook(**xlrd_params)
return xls_book
@ -226,6 +178,24 @@ class XLSBook(BookReader):
return params
class XLSInFile(XLSReader):
def __init__(self, file_name, file_type, **keywords):
super().__init__(file_type, filename=file_name, **keywords)
class XLSInContent(XLSReader):
def __init__(self, file_content, file_type, **keywords):
super().__init__(file_type, file_contents=file_content, **keywords)
class XLSInMemory(XLSReader):
def __init__(self, file_stream, file_type, **keywords):
file_stream.seek(0)
super().__init__(
file_type, file_contents=file_stream.read(), **keywords
)
def xldate_to_python_date(value, date_mode):
"""
convert xl date to python date

View File

@ -4,16 +4,15 @@
The lower level xls file format handler using xlwt
:copyright: (c) 2016-2017 by Onni Software Ltd
:copyright: (c) 2016-2020 by Onni Software Ltd
:license: New BSD License
"""
import datetime
import xlrd
from xlwt import XFStyle, Workbook
from pyexcel_io.book import BookWriter
from pyexcel_io.sheet import SheetWriter
from pyexcel_io.plugin_api.abstract_writer import IWriter
DEFAULT_DATE_FORMAT = "DD/MM/YY"
DEFAULT_TIME_FORMAT = "HH:MM:SS"
@ -27,8 +26,7 @@ class XLSheetWriter(SheetWriter):
"""
def set_sheet_name(self, name):
"""Create a sheet
"""
"""Create a sheet"""
self._native_sheet = self._native_book.add_sheet(name)
self.current_row = 0
@ -68,32 +66,33 @@ class XLSheetWriter(SheetWriter):
self.current_row += 1
class XLSWriter(BookWriter):
class XLSWriter(IWriter):
"""
xls writer
"""
def __init__(self):
BookWriter.__init__(self)
self.work_book = None
def open(
self, file_name, encoding="ascii", style_compression=2, **keywords
def __init__(
self,
file_alike_object,
_, # file_type not used
encoding="ascii",
style_compression=2,
**keywords
):
BookWriter.open(self, file_name, **keywords)
self._file_alike_object = file_alike_object
self.work_book = Workbook(
style_compression=style_compression, encoding=encoding
)
def write(self, incoming_dict):
if incoming_dict:
BookWriter.write(self, incoming_dict)
else:
raise NotImplementedError(EMPTY_SHEET_NOT_ALLOWED)
def create_sheet(self, name):
return XLSheetWriter(self.work_book, None, name)
def write(self, incoming_dict):
if incoming_dict:
IWriter.write(self, incoming_dict)
else:
raise NotImplementedError(EMPTY_SHEET_NOT_ALLOWED)
def close(self):
"""
This call actually save the file

View File

@ -1,2 +1,2 @@
https://github.com/pyexcel/pyexcel-io/archive/master.zip
https://github.com/pyexcel/pyexcel-io/archive/dev.zip

View File

@ -6,6 +6,7 @@
import os
import datetime
from unittest.mock import MagicMock, patch
import pyexcel as pe
from _compact import OrderedDict
@ -13,7 +14,6 @@ from pyexcel_xls import save_data
from pyexcel_xls.xlsr import xldate_to_python_date
from pyexcel_xls.xlsw import XLSWriter as Writer
from mock import patch
from nose import SkipTest
from nose.tools import eq_, raises
@ -79,13 +79,11 @@ def test_issue_16_file_stream_has_no_getvalue():
@patch("xlrd.open_workbook")
def test_issue_18_encoding_override_isnt_passed(fake_open):
fake_open.return_value = None
fake_open.return_value = MagicMock(sheets=MagicMock(return_value=[]))
test_encoding = "utf-32"
from pyexcel_xls.xlsr import XLSBook
from pyexcel_xls.xlsr import XLSInFile
book = XLSBook()
book.open("fake_file.xls", encoding_override=test_encoding)
book._get_book()
XLSInFile("fake_file.xls", "xls", encoding_override=test_encoding)
keywords = fake_open.call_args[1]
assert keywords["encoding_override"] == test_encoding
@ -112,7 +110,7 @@ def test_empty_book_pyexcel_issue_120():
"""
https://github.com/pyexcel/pyexcel/issues/120
"""
writer = Writer()
writer = Writer("fake.xls", "xls")
writer.write({})

View File

@ -1,8 +1,9 @@
import os
from nose.tools import eq_
from pyexcel_io import get_data, save_data
from nose.tools import eq_
class TestFilter:
def setUp(self):

View File

@ -1,7 +1,7 @@
import os
from base import PyexcelWriterBase, PyexcelHatWriterBase
from pyexcel_xls.xlsr import XLSBook as Reader
from pyexcel_xls.xlsr import XLSReader as Reader
from pyexcel_xls.xlsw import XLSWriter as Writer
@ -13,12 +13,10 @@ class TestNativeXLSWriter:
"Sheet3": [[u"X", u"Y", u"Z"], [1, 4, 7], [2, 5, 8], [3, 6, 9]],
}
self.testfile = "writer.xls"
writer = Writer()
writer.open(self.testfile)
writer = Writer(self.testfile, "xls")
writer.write(self.content)
writer.close()
reader = Reader()
reader.open(self.testfile)
reader = Reader("xls", filename=self.testfile)
content = reader.read_all()
for key in content.keys():
content[key] = list(content[key])