New style io (#36)

* 🎉 new style reader * 🎉 new style writer and reader plugins * This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst * 💚 update moban update * This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst * 💚 use pyexcel-io dev branch * 📚 update PR template * 🐴 code refactoring Co-authored-by: chfw <chfw@users.noreply.github.com>
2020-09-29 22:26:05 +01:00 · 2020-09-29 22:26:05 +01:00 · d3b57b434b
parent 25647ac365
commit d3b57b434b
13 changed files with 133 additions and 154 deletions
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -1,11 +1,9 @@
 With your PR, here is a check list:

- [ ] Has Test cases written
- [ ] Has all code lines tested
+- [ ] Has test cases written?
+- [ ] Has all code lines tested?
 - [ ] Has `make format` been run?
 - [ ] Please update CHANGELOG.yml(not CHANGELOG.rst)
 - [ ] Passes all Travis CI builds
 - [ ] Has fair amount of documentation if your change is complex
- [ ] run 'make format' so as to confirm the pyexcel organisation's coding style
- [ ] Please add yourself to 'contributors' section of pyexcel-xls.yml (if not found, please use CONTRIBUTORS.rst)
 - [ ] Agree on NEW BSD License for your contribution
--- a/.github/workflows/moban-update.yml
+++ b/.github/workflows/moban-update.yml
@ -5,23 +5,25 @@ jobs:
    runs-on: ubuntu-latest
    name: synchronize templates via moban
    steps:
-    - uses: actions/checkout@v2
-      with:
-        ref: ${{ github.head_ref }}
-    - name: Set up Python
-      uses: actions/setup-python@v1
-      with:
-        python-version: '3.7'
-    - name: check changes
-      run: |
-        pip install moban gitfs2 pypifs
-        moban
-        git status
-        git diff --exit-code
-    - name: Auto-commit
-      if: failure()
-      uses: docker://cdssnc/auto-commit-github-action
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      with:
-        args: This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst
+      - uses: actions/checkout@v2
+        with:
+          ref: ${{ github.head_ref }}
+      - name: Set up Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: '3.7'
+      - name: check changes
+        run: |
+          pip install moban gitfs2 pypifs moban-jinja2-github moban-ansible
+          moban
+          git status
+          git diff --exit-code
+      - name: Auto-commit
+        if: failure()
+        uses: docker://cdssnc/auto-commit-github-action
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          args: >-
+            This is an auto-commit, updating project meta data,
+            such as changelog.rst, contributors.rst
--- a/.gitignore
+++ b/.gitignore
@ -540,3 +540,7 @@ cscope.files
 cscope.out
 cscope.in.out
 cscope.po.out
+
+
+# remove moban hash dictionary
+.moban.hashes
--- a/CONTRIBUTORS.rst
+++ b/CONTRIBUTORS.rst
@ -4,4 +4,4 @@

 In alphabetical order:

-* `John Vandenberg <https://api.github.com/users/jayvdb>`_
+* `John Vandenberg <https://github.com/jayvdb>`_
--- a/3
+++ b/3
@ -6,9 +6,6 @@ test: lint
 install_test:
 	pip install -r tests/requirements.txt

-git-diff-check:
-	git diff --exit-code
-
 lint:
 	bash lint.sh

--- a/lint.sh
+++ b/lint.sh
@ -1,3 +1,2 @@
 pip install flake8
-flake8 . --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long
-python setup.py checkdocs
+flake8 --exclude=.moban.d,docs,setup.py   --builtins=unicode,xrange,long . && python setup.py checkdocs
--- a/pyexcel_xls/init.py
+++ b/pyexcel_xls/init.py
@ -4,7 +4,7 @@

    The lower level xls/xlsx/xlsm file format handler using xlrd/xlwt

-    :copyright: (c) 2016-2017 by Onni Software Ltd
+    :copyright: (c) 2016-2020 by Onni Software Ltd
    :license: New BSD License
 """

@ -15,15 +15,28 @@ from pyexcel_io.io import save_data as write_data

 # this line has to be place above all else
 # because of dynamic import
-from pyexcel_io.plugins import IOPluginInfoChain
+from pyexcel_io.plugins import IOPluginInfoChainV2

 __FILE_TYPE__ = "xls"
-IOPluginInfoChain(__name__).add_a_reader(
-    relative_plugin_class_path="xlsr.XLSBook",
+
+IOPluginInfoChainV2(__name__).add_a_reader(
+    relative_plugin_class_path="xlsr.XLSInFile",
+    locations=["file"],
+    file_types=[__FILE_TYPE__, "xlsx", "xlsm"],
+    stream_type="binary",
+).add_a_reader(
+    relative_plugin_class_path="xlsr.XLSInMemory",
+    locations=["memory"],
+    file_types=[__FILE_TYPE__, "xlsx", "xlsm"],
+    stream_type="binary",
+).add_a_reader(
+    relative_plugin_class_path="xlsr.XLSInContent",
+    locations=["content"],
    file_types=[__FILE_TYPE__, "xlsx", "xlsm"],
    stream_type="binary",
 ).add_a_writer(
    relative_plugin_class_path="xlsw.XLSWriter",
+    locations=["file", "memory"],
    file_types=[__FILE_TYPE__],
    stream_type="binary",
 )
--- a/pyexcel_xls/xlsr.py
+++ b/pyexcel_xls/xlsr.py
@ -4,17 +4,16 @@

    The lower level xls/xlsm file format handler using xlrd

-    :copyright: (c) 2016-2017 by Onni Software Ltd
+    :copyright: (c) 2016-2020 by Onni Software Ltd
    :license: New BSD License
 """
 import datetime

 import xlrd
-
-from pyexcel_io.book import BookReader
-from pyexcel_io.sheet import SheetReader
 from pyexcel_io.service import has_no_digits_in_float
-from pyexcel_io._compact import OrderedDict, irange
+from pyexcel_io._compact import irange
+from pyexcel_io.plugin_api.abstract_sheet import ISheet
+from pyexcel_io.plugin_api.abstract_reader import IReader

 XLS_KEYWORDS = [
    "filename",
@ -45,7 +44,7 @@ class MergedCell(object):
                registry[key] = self


-class XLSheet(SheetReader):
+class XLSheet(ISheet):
    """
    xls, xlsx, xlsm sheet reader

@ -53,12 +52,13 @@ class XLSheet(SheetReader):
    """

    def __init__(self, sheet, auto_detect_int=True, date_mode=0, **keywords):
-        SheetReader.__init__(self, sheet, **keywords)
        self.__auto_detect_int = auto_detect_int
        self.__hidden_cols = []
        self.__hidden_rows = []
        self.__merged_cells = {}
        self._book_date_mode = date_mode
+        self._native_sheet = sheet
+        self._keywords = keywords
        if keywords.get("detect_merged_cells") is True:
            for merged_cell_ranges in sheet.merged_cells:
                merged_cells = MergedCell(*merged_cell_ranges)
@ -75,17 +75,14 @@ class XLSheet(SheetReader):
    def name(self):
        return self._native_sheet.name

-    def number_of_rows(self):
-        """
-        Number of rows in the xls sheet
-        """
-        return self._native_sheet.nrows - len(self.__hidden_rows)
+    def row_iterator(self):
+        number_of_rows = self._native_sheet.nrows - len(self.__hidden_rows)
+        return range(number_of_rows)

-    def number_of_columns(self):
-        """
-        Number of columns in the xls sheet
-        """
-        return self._native_sheet.ncols - len(self.__hidden_cols)
+    def column_iterator(self, row):
+        number_of_columns = self._native_sheet.ncols - len(self.__hidden_cols)
+        for column in range(number_of_columns):
+            yield self.cell_value(row, column)

    def cell_value(self, row, column):
        """
@ -127,93 +124,48 @@ def calculate_offsets(incoming_index, hidden_indices):
    return incoming_index + offset


-class XLSBook(BookReader):
+class XLSReader(IReader):
    """
    XLSBook reader

    It reads xls, xlsm, xlsx work book
    """

-    def __init__(self):
-        BookReader.__init__(self)
-        self._file_content = None
-        self.__skip_hidden_sheets = True
-        self.__skip_hidden_row_column = True
-        self.__detect_merged_cells = False
-
-    def open(self, file_name, **keywords):
-        self.__parse_keywords(**keywords)
-        BookReader.open(self, file_name, **keywords)
-
-    def open_stream(self, file_stream, **keywords):
-        self.__parse_keywords(**keywords)
-        BookReader.open_stream(self, file_stream, **keywords)
-
-    def open_content(self, file_content, **keywords):
-        self.__parse_keywords(**keywords)
-        self._keywords = keywords
-        self._file_content = file_content
-
-    def __parse_keywords(self, **keywords):
+    def __init__(self, file_type, **keywords):
        self.__skip_hidden_sheets = keywords.get("skip_hidden_sheets", True)
        self.__skip_hidden_row_column = keywords.get(
            "skip_hidden_row_and_column", True
        )
        self.__detect_merged_cells = keywords.get("detect_merged_cells", False)
+        self._keywords = keywords
+        xlrd_params = self._extract_xlrd_params()
+        if self.__skip_hidden_row_column and file_type == "xls":
+            xlrd_params["formatting_info"] = True
+        if self.__detect_merged_cells:
+            xlrd_params["formatting_info"] = True
+
+        self.content_array = []
+        self._native_book = self.get_xls_book(**xlrd_params)
+        for sheet in self._native_book.sheets():
+            if self.__skip_hidden_sheets and sheet.visibility != 0:
+                continue
+            self.content_array.append(sheet)
+
+    def read_sheet(self, index):
+        native_sheet = self.content_array[index]
+        sheet = XLSheet(
+            native_sheet,
+            date_mode=self._native_book.datemode,
+            **self._keywords
+        )
+        return sheet

    def close(self):
        if self._native_book:
            self._native_book.release_resources()
            self._native_book = None

-    def read_sheet_by_index(self, sheet_index):
-        self._native_book = self._get_book(on_demand=True)
-        sheet = self._native_book.sheet_by_index(sheet_index)
-        return self.read_sheet(sheet)
-
-    def read_sheet_by_name(self, sheet_name):
-        self._native_book = self._get_book(on_demand=True)
-        try:
-            sheet = self._native_book.sheet_by_name(sheet_name)
-        except xlrd.XLRDError:
-            raise ValueError("%s cannot be found" % sheet_name)
-        return self.read_sheet(sheet)
-
-    def read_all(self):
-        result = OrderedDict()
-        self._native_book = self._get_book()
-        for sheet in self._native_book.sheets():
-            if self.__skip_hidden_sheets and sheet.visibility != 0:
-                continue
-            data_dict = self.read_sheet(sheet)
-            result.update(data_dict)
-        return result
-
-    def read_sheet(self, native_sheet):
-        sheet = XLSheet(
-            native_sheet,
-            date_mode=self._native_book.datemode,
-            **self._keywords
-        )
-        return {sheet.name: sheet.to_array()}
-
-    def _get_book(self, on_demand=False):
-        xlrd_params = self._extract_xlrd_params()
-        xlrd_params["on_demand"] = on_demand
-
-        if self._file_name:
-            xlrd_params["filename"] = self._file_name
-        elif self._file_stream:
-            file_content = self._file_stream.read()
-            xlrd_params["file_contents"] = file_content
-        elif self._file_content is not None:
-            xlrd_params["file_contents"] = self._file_content
-        else:
-            raise IOError("No valid file name or file content found.")
-        if self.__skip_hidden_row_column and self._file_type == "xls":
-            xlrd_params["formatting_info"] = True
-        if self.__detect_merged_cells:
-            xlrd_params["formatting_info"] = True
+    def get_xls_book(self, **xlrd_params):
        xls_book = xlrd.open_workbook(**xlrd_params)
        return xls_book

@ -226,6 +178,24 @@ class XLSBook(BookReader):
        return params


+class XLSInFile(XLSReader):
+    def __init__(self, file_name, file_type, **keywords):
+        super().__init__(file_type, filename=file_name, **keywords)
+
+
+class XLSInContent(XLSReader):
+    def __init__(self, file_content, file_type, **keywords):
+        super().__init__(file_type, file_contents=file_content, **keywords)
+
+
+class XLSInMemory(XLSReader):
+    def __init__(self, file_stream, file_type, **keywords):
+        file_stream.seek(0)
+        super().__init__(
+            file_type, file_contents=file_stream.read(), **keywords
+        )
+
+
 def xldate_to_python_date(value, date_mode):
    """
    convert xl date to python date
--- a/pyexcel_xls/xlsw.py
+++ b/pyexcel_xls/xlsw.py
@ -4,16 +4,15 @@

    The lower level xls file format handler using xlwt

-    :copyright: (c) 2016-2017 by Onni Software Ltd
+    :copyright: (c) 2016-2020 by Onni Software Ltd
    :license: New BSD License
 """
 import datetime

 import xlrd
 from xlwt import XFStyle, Workbook
-
-from pyexcel_io.book import BookWriter
 from pyexcel_io.sheet import SheetWriter
+from pyexcel_io.plugin_api.abstract_writer import IWriter

 DEFAULT_DATE_FORMAT = "DD/MM/YY"
 DEFAULT_TIME_FORMAT = "HH:MM:SS"
@ -27,8 +26,7 @@ class XLSheetWriter(SheetWriter):
    """

    def set_sheet_name(self, name):
-        """Create a sheet
-        """
+        """Create a sheet"""
        self._native_sheet = self._native_book.add_sheet(name)
        self.current_row = 0

@ -68,32 +66,33 @@ class XLSheetWriter(SheetWriter):
        self.current_row += 1


-class XLSWriter(BookWriter):
+class XLSWriter(IWriter):
    """
    xls writer
    """

-    def __init__(self):
-        BookWriter.__init__(self)
-        self.work_book = None
-
-    def open(
-        self, file_name, encoding="ascii", style_compression=2, **keywords
+    def __init__(
+        self,
+        file_alike_object,
+        _,  # file_type not used
+        encoding="ascii",
+        style_compression=2,
+        **keywords
    ):
-        BookWriter.open(self, file_name, **keywords)
+        self._file_alike_object = file_alike_object
        self.work_book = Workbook(
            style_compression=style_compression, encoding=encoding
        )

-    def write(self, incoming_dict):
-        if incoming_dict:
-            BookWriter.write(self, incoming_dict)
-        else:
-            raise NotImplementedError(EMPTY_SHEET_NOT_ALLOWED)
-
    def create_sheet(self, name):
        return XLSheetWriter(self.work_book, None, name)

+    def write(self, incoming_dict):
+        if incoming_dict:
+            IWriter.write(self, incoming_dict)
+        else:
+            raise NotImplementedError(EMPTY_SHEET_NOT_ALLOWED)
+
    def close(self):
        """
        This call actually save the file
--- a/rnd_requirements.txt
+++ b/rnd_requirements.txt
@ -1,2 +1,2 @@
-https://github.com/pyexcel/pyexcel-io/archive/master.zip
+https://github.com/pyexcel/pyexcel-io/archive/dev.zip

--- a/tests/test_bug_fixes.py
+++ b/tests/test_bug_fixes.py
@ -6,6 +6,7 @@

 import os
 import datetime
+from unittest.mock import MagicMock, patch

 import pyexcel as pe
 from _compact import OrderedDict
@ -13,7 +14,6 @@ from pyexcel_xls import save_data
 from pyexcel_xls.xlsr import xldate_to_python_date
 from pyexcel_xls.xlsw import XLSWriter as Writer

-from mock import patch
 from nose import SkipTest
 from nose.tools import eq_, raises

@ -79,13 +79,11 @@ def test_issue_16_file_stream_has_no_getvalue():

@patch("xlrd.open_workbook")
 def test_issue_18_encoding_override_isnt_passed(fake_open):
-    fake_open.return_value = None
+    fake_open.return_value = MagicMock(sheets=MagicMock(return_value=[]))
    test_encoding = "utf-32"
-    from pyexcel_xls.xlsr import XLSBook
+    from pyexcel_xls.xlsr import XLSInFile

-    book = XLSBook()
-    book.open("fake_file.xls", encoding_override=test_encoding)
-    book._get_book()
+    XLSInFile("fake_file.xls", "xls", encoding_override=test_encoding)
    keywords = fake_open.call_args[1]
    assert keywords["encoding_override"] == test_encoding

@ -112,7 +110,7 @@ def test_empty_book_pyexcel_issue_120():
    """
    https://github.com/pyexcel/pyexcel/issues/120
    """
-    writer = Writer()
+    writer = Writer("fake.xls", "xls")
    writer.write({})


--- a/tests/test_filter.py
+++ b/tests/test_filter.py
@ -1,8 +1,9 @@
 import os

-from nose.tools import eq_
 from pyexcel_io import get_data, save_data

+from nose.tools import eq_
+

 class TestFilter:
    def setUp(self):
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@ -1,7 +1,7 @@
 import os

 from base import PyexcelWriterBase, PyexcelHatWriterBase
-from pyexcel_xls.xlsr import XLSBook as Reader
+from pyexcel_xls.xlsr import XLSReader as Reader
 from pyexcel_xls.xlsw import XLSWriter as Writer


@ -13,12 +13,10 @@ class TestNativeXLSWriter:
            "Sheet3": [[u"X", u"Y", u"Z"], [1, 4, 7], [2, 5, 8], [3, 6, 9]],
        }
        self.testfile = "writer.xls"
-        writer = Writer()
-        writer.open(self.testfile)
+        writer = Writer(self.testfile, "xls")
        writer.write(self.content)
        writer.close()
-        reader = Reader()
-        reader.open(self.testfile)
+        reader = Reader("xls", filename=self.testfile)
        content = reader.read_all()
        for key in content.keys():
            content[key] = list(content[key])