From d3b57b434b7a4920dcf02e2be5d054259e42736b Mon Sep 17 00:00:00 2001
From: jaska <wangc_2011@hotmail.com>
Date: Tue, 29 Sep 2020 22:26:05 +0100
Subject: [PATCH] New style io (#36)

* :tada: new style reader

* :tada: new style writer and reader plugins

* This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst

* :green_heart: update moban update

* This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst

* :green_heart: use pyexcel-io dev branch

* :books: update PR template

* :horse: code refactoring

Co-authored-by: chfw <chfw@users.noreply.github.com>
---
 .github/PULL_REQUEST_TEMPLATE.md   |   6 +-
 .github/workflows/moban-update.yml |  42 +++++----
 .gitignore                         |   4 +
 CONTRIBUTORS.rst                   |   2 +-
 Makefile                           |   3 -
 lint.sh                            |   3 +-
 pyexcel_xls/__init__.py            |  21 ++++-
 pyexcel_xls/xlsr.py                | 144 ++++++++++++-----------------
 pyexcel_xls/xlsw.py                |  37 ++++----
 rnd_requirements.txt               |   2 +-
 tests/test_bug_fixes.py            |  12 +--
 tests/test_filter.py               |   3 +-
 tests/test_writer.py               |   8 +-
 13 files changed, 133 insertions(+), 154 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 7d1b0c8..6017f21 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,11 +1,9 @@
 With your PR, here is a check list:
 
-- [ ] Has Test cases written
-- [ ] Has all code lines tested
+- [ ] Has test cases written?
+- [ ] Has all code lines tested?
 - [ ] Has `make format` been run?
 - [ ] Please update CHANGELOG.yml(not CHANGELOG.rst)
 - [ ] Passes all Travis CI builds
 - [ ] Has fair amount of documentation if your change is complex
-- [ ] run 'make format' so as to confirm the pyexcel organisation's coding style
-- [ ] Please add yourself to 'contributors' section of pyexcel-xls.yml (if not found, please use CONTRIBUTORS.rst)
 - [ ] Agree on NEW BSD License for your contribution
diff --git a/.github/workflows/moban-update.yml b/.github/workflows/moban-update.yml
index db4f13a..706fd82 100644
--- a/.github/workflows/moban-update.yml
+++ b/.github/workflows/moban-update.yml
@@ -5,23 +5,25 @@ jobs:
     runs-on: ubuntu-latest
     name: synchronize templates via moban
     steps:
-    - uses: actions/checkout@v2
-      with:
-        ref: ${{ github.head_ref }}
-    - name: Set up Python
-      uses: actions/setup-python@v1
-      with:
-        python-version: '3.7'
-    - name: check changes
-      run: |
-        pip install moban gitfs2 pypifs
-        moban
-        git status
-        git diff --exit-code
-    - name: Auto-commit
-      if: failure()
-      uses: docker://cdssnc/auto-commit-github-action
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      with:
-        args: This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst
+      - uses: actions/checkout@v2
+        with:
+          ref: ${{ github.head_ref }}
+      - name: Set up Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: '3.7'
+      - name: check changes
+        run: |
+          pip install moban gitfs2 pypifs moban-jinja2-github moban-ansible
+          moban
+          git status
+          git diff --exit-code
+      - name: Auto-commit
+        if: failure()
+        uses: docker://cdssnc/auto-commit-github-action
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          args: >-
+            This is an auto-commit, updating project meta data,
+            such as changelog.rst, contributors.rst
diff --git a/.gitignore b/.gitignore
index a9ca840..e8b12f9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -540,3 +540,7 @@ cscope.files
 cscope.out
 cscope.in.out
 cscope.po.out
+
+
+# remove moban hash dictionary
+.moban.hashes
diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst
index c8e37ad..bfd5904 100644
--- a/CONTRIBUTORS.rst
+++ b/CONTRIBUTORS.rst
@@ -4,4 +4,4 @@
 
 In alphabetical order:
 
-* `John Vandenberg <https://api.github.com/users/jayvdb>`_
+* `John Vandenberg <https://github.com/jayvdb>`_
diff --git a/Makefile b/Makefile
index d7e640e..3e0ee51 100644
--- a/Makefile
+++ b/Makefile
@@ -6,9 +6,6 @@ test: lint
 install_test:
 	pip install -r tests/requirements.txt
 
-git-diff-check:
-	git diff --exit-code
-
 lint:
 	bash lint.sh
 
diff --git a/lint.sh b/lint.sh
index 6907d07..891aa63 100644
--- a/lint.sh
+++ b/lint.sh
@@ -1,3 +1,2 @@
 pip install flake8
-flake8 . --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long
-python setup.py checkdocs
+flake8 --exclude=.moban.d,docs,setup.py   --builtins=unicode,xrange,long . && python setup.py checkdocs
diff --git a/pyexcel_xls/__init__.py b/pyexcel_xls/__init__.py
index f512cfc..a688b5b 100644
--- a/pyexcel_xls/__init__.py
+++ b/pyexcel_xls/__init__.py
@@ -4,7 +4,7 @@
 
     The lower level xls/xlsx/xlsm file format handler using xlrd/xlwt
 
-    :copyright: (c) 2016-2017 by Onni Software Ltd
+    :copyright: (c) 2016-2020 by Onni Software Ltd
     :license: New BSD License
 """
 
@@ -15,15 +15,28 @@ from pyexcel_io.io import save_data as write_data
 
 # this line has to be place above all else
 # because of dynamic import
-from pyexcel_io.plugins import IOPluginInfoChain
+from pyexcel_io.plugins import IOPluginInfoChainV2
 
 __FILE_TYPE__ = "xls"
-IOPluginInfoChain(__name__).add_a_reader(
-    relative_plugin_class_path="xlsr.XLSBook",
+
+IOPluginInfoChainV2(__name__).add_a_reader(
+    relative_plugin_class_path="xlsr.XLSInFile",
+    locations=["file"],
+    file_types=[__FILE_TYPE__, "xlsx", "xlsm"],
+    stream_type="binary",
+).add_a_reader(
+    relative_plugin_class_path="xlsr.XLSInMemory",
+    locations=["memory"],
+    file_types=[__FILE_TYPE__, "xlsx", "xlsm"],
+    stream_type="binary",
+).add_a_reader(
+    relative_plugin_class_path="xlsr.XLSInContent",
+    locations=["content"],
     file_types=[__FILE_TYPE__, "xlsx", "xlsm"],
     stream_type="binary",
 ).add_a_writer(
     relative_plugin_class_path="xlsw.XLSWriter",
+    locations=["file", "memory"],
     file_types=[__FILE_TYPE__],
     stream_type="binary",
 )
diff --git a/pyexcel_xls/xlsr.py b/pyexcel_xls/xlsr.py
index a6b9b97..7a635e9 100644
--- a/pyexcel_xls/xlsr.py
+++ b/pyexcel_xls/xlsr.py
@@ -4,17 +4,16 @@
 
     The lower level xls/xlsm file format handler using xlrd
 
-    :copyright: (c) 2016-2017 by Onni Software Ltd
+    :copyright: (c) 2016-2020 by Onni Software Ltd
     :license: New BSD License
 """
 import datetime
 
 import xlrd
-
-from pyexcel_io.book import BookReader
-from pyexcel_io.sheet import SheetReader
 from pyexcel_io.service import has_no_digits_in_float
-from pyexcel_io._compact import OrderedDict, irange
+from pyexcel_io._compact import irange
+from pyexcel_io.plugin_api.abstract_sheet import ISheet
+from pyexcel_io.plugin_api.abstract_reader import IReader
 
 XLS_KEYWORDS = [
     "filename",
@@ -45,7 +44,7 @@ class MergedCell(object):
                 registry[key] = self
 
 
-class XLSheet(SheetReader):
+class XLSheet(ISheet):
     """
     xls, xlsx, xlsm sheet reader
 
@@ -53,12 +52,13 @@ class XLSheet(SheetReader):
     """
 
     def __init__(self, sheet, auto_detect_int=True, date_mode=0, **keywords):
-        SheetReader.__init__(self, sheet, **keywords)
         self.__auto_detect_int = auto_detect_int
         self.__hidden_cols = []
         self.__hidden_rows = []
         self.__merged_cells = {}
         self._book_date_mode = date_mode
+        self._native_sheet = sheet
+        self._keywords = keywords
         if keywords.get("detect_merged_cells") is True:
             for merged_cell_ranges in sheet.merged_cells:
                 merged_cells = MergedCell(*merged_cell_ranges)
@@ -75,17 +75,14 @@ class XLSheet(SheetReader):
     def name(self):
         return self._native_sheet.name
 
-    def number_of_rows(self):
-        """
-        Number of rows in the xls sheet
-        """
-        return self._native_sheet.nrows - len(self.__hidden_rows)
+    def row_iterator(self):
+        number_of_rows = self._native_sheet.nrows - len(self.__hidden_rows)
+        return range(number_of_rows)
 
-    def number_of_columns(self):
-        """
-        Number of columns in the xls sheet
-        """
-        return self._native_sheet.ncols - len(self.__hidden_cols)
+    def column_iterator(self, row):
+        number_of_columns = self._native_sheet.ncols - len(self.__hidden_cols)
+        for column in range(number_of_columns):
+            yield self.cell_value(row, column)
 
     def cell_value(self, row, column):
         """
@@ -127,93 +124,48 @@ def calculate_offsets(incoming_index, hidden_indices):
     return incoming_index + offset
 
 
-class XLSBook(BookReader):
+class XLSReader(IReader):
     """
     XLSBook reader
 
     It reads xls, xlsm, xlsx work book
     """
 
-    def __init__(self):
-        BookReader.__init__(self)
-        self._file_content = None
-        self.__skip_hidden_sheets = True
-        self.__skip_hidden_row_column = True
-        self.__detect_merged_cells = False
-
-    def open(self, file_name, **keywords):
-        self.__parse_keywords(**keywords)
-        BookReader.open(self, file_name, **keywords)
-
-    def open_stream(self, file_stream, **keywords):
-        self.__parse_keywords(**keywords)
-        BookReader.open_stream(self, file_stream, **keywords)
-
-    def open_content(self, file_content, **keywords):
-        self.__parse_keywords(**keywords)
-        self._keywords = keywords
-        self._file_content = file_content
-
-    def __parse_keywords(self, **keywords):
+    def __init__(self, file_type, **keywords):
         self.__skip_hidden_sheets = keywords.get("skip_hidden_sheets", True)
         self.__skip_hidden_row_column = keywords.get(
             "skip_hidden_row_and_column", True
         )
         self.__detect_merged_cells = keywords.get("detect_merged_cells", False)
+        self._keywords = keywords
+        xlrd_params = self._extract_xlrd_params()
+        if self.__skip_hidden_row_column and file_type == "xls":
+            xlrd_params["formatting_info"] = True
+        if self.__detect_merged_cells:
+            xlrd_params["formatting_info"] = True
+
+        self.content_array = []
+        self._native_book = self.get_xls_book(**xlrd_params)
+        for sheet in self._native_book.sheets():
+            if self.__skip_hidden_sheets and sheet.visibility != 0:
+                continue
+            self.content_array.append(sheet)
+
+    def read_sheet(self, index):
+        native_sheet = self.content_array[index]
+        sheet = XLSheet(
+            native_sheet,
+            date_mode=self._native_book.datemode,
+            **self._keywords
+        )
+        return sheet
 
     def close(self):
         if self._native_book:
             self._native_book.release_resources()
             self._native_book = None
 
-    def read_sheet_by_index(self, sheet_index):
-        self._native_book = self._get_book(on_demand=True)
-        sheet = self._native_book.sheet_by_index(sheet_index)
-        return self.read_sheet(sheet)
-
-    def read_sheet_by_name(self, sheet_name):
-        self._native_book = self._get_book(on_demand=True)
-        try:
-            sheet = self._native_book.sheet_by_name(sheet_name)
-        except xlrd.XLRDError:
-            raise ValueError("%s cannot be found" % sheet_name)
-        return self.read_sheet(sheet)
-
-    def read_all(self):
-        result = OrderedDict()
-        self._native_book = self._get_book()
-        for sheet in self._native_book.sheets():
-            if self.__skip_hidden_sheets and sheet.visibility != 0:
-                continue
-            data_dict = self.read_sheet(sheet)
-            result.update(data_dict)
-        return result
-
-    def read_sheet(self, native_sheet):
-        sheet = XLSheet(
-            native_sheet,
-            date_mode=self._native_book.datemode,
-            **self._keywords
-        )
-        return {sheet.name: sheet.to_array()}
-
-    def _get_book(self, on_demand=False):
-        xlrd_params = self._extract_xlrd_params()
-        xlrd_params["on_demand"] = on_demand
-
-        if self._file_name:
-            xlrd_params["filename"] = self._file_name
-        elif self._file_stream:
-            file_content = self._file_stream.read()
-            xlrd_params["file_contents"] = file_content
-        elif self._file_content is not None:
-            xlrd_params["file_contents"] = self._file_content
-        else:
-            raise IOError("No valid file name or file content found.")
-        if self.__skip_hidden_row_column and self._file_type == "xls":
-            xlrd_params["formatting_info"] = True
-        if self.__detect_merged_cells:
-            xlrd_params["formatting_info"] = True
+    def get_xls_book(self, **xlrd_params):
         xls_book = xlrd.open_workbook(**xlrd_params)
         return xls_book
 
@@ -226,6 +178,24 @@ class XLSBook(BookReader):
         return params
 
 
+class XLSInFile(XLSReader):
+    def __init__(self, file_name, file_type, **keywords):
+        super().__init__(file_type, filename=file_name, **keywords)
+
+
+class XLSInContent(XLSReader):
+    def __init__(self, file_content, file_type, **keywords):
+        super().__init__(file_type, file_contents=file_content, **keywords)
+
+
+class XLSInMemory(XLSReader):
+    def __init__(self, file_stream, file_type, **keywords):
+        file_stream.seek(0)
+        super().__init__(
+            file_type, file_contents=file_stream.read(), **keywords
+        )
+
+
 def xldate_to_python_date(value, date_mode):
     """
     convert xl date to python date
diff --git a/pyexcel_xls/xlsw.py b/pyexcel_xls/xlsw.py
index 5dd1520..7d08c1b 100644
--- a/pyexcel_xls/xlsw.py
+++ b/pyexcel_xls/xlsw.py
@@ -4,16 +4,15 @@
 
     The lower level xls file format handler using xlwt
 
-    :copyright: (c) 2016-2017 by Onni Software Ltd
+    :copyright: (c) 2016-2020 by Onni Software Ltd
     :license: New BSD License
 """
 import datetime
 
 import xlrd
 from xlwt import XFStyle, Workbook
-
-from pyexcel_io.book import BookWriter
 from pyexcel_io.sheet import SheetWriter
+from pyexcel_io.plugin_api.abstract_writer import IWriter
 
 DEFAULT_DATE_FORMAT = "DD/MM/YY"
 DEFAULT_TIME_FORMAT = "HH:MM:SS"
@@ -27,8 +26,7 @@ class XLSheetWriter(SheetWriter):
     """
 
     def set_sheet_name(self, name):
-        """Create a sheet
-        """
+        """Create a sheet"""
         self._native_sheet = self._native_book.add_sheet(name)
         self.current_row = 0
 
@@ -68,32 +66,33 @@ class XLSheetWriter(SheetWriter):
         self.current_row += 1
 
 
-class XLSWriter(BookWriter):
+class XLSWriter(IWriter):
     """
     xls writer
     """
 
-    def __init__(self):
-        BookWriter.__init__(self)
-        self.work_book = None
-
-    def open(
-        self, file_name, encoding="ascii", style_compression=2, **keywords
+    def __init__(
+        self,
+        file_alike_object,
+        _,  # file_type not used
+        encoding="ascii",
+        style_compression=2,
+        **keywords
     ):
-        BookWriter.open(self, file_name, **keywords)
+        self._file_alike_object = file_alike_object
         self.work_book = Workbook(
             style_compression=style_compression, encoding=encoding
         )
 
-    def write(self, incoming_dict):
-        if incoming_dict:
-            BookWriter.write(self, incoming_dict)
-        else:
-            raise NotImplementedError(EMPTY_SHEET_NOT_ALLOWED)
-
     def create_sheet(self, name):
         return XLSheetWriter(self.work_book, None, name)
 
+    def write(self, incoming_dict):
+        if incoming_dict:
+            IWriter.write(self, incoming_dict)
+        else:
+            raise NotImplementedError(EMPTY_SHEET_NOT_ALLOWED)
+
     def close(self):
         """
         This call actually save the file
diff --git a/rnd_requirements.txt b/rnd_requirements.txt
index 6d9386d..dffaa2d 100644
--- a/rnd_requirements.txt
+++ b/rnd_requirements.txt
@@ -1,2 +1,2 @@
-https://github.com/pyexcel/pyexcel-io/archive/master.zip
+https://github.com/pyexcel/pyexcel-io/archive/dev.zip
 
diff --git a/tests/test_bug_fixes.py b/tests/test_bug_fixes.py
index d298374..f38abfc 100644
--- a/tests/test_bug_fixes.py
+++ b/tests/test_bug_fixes.py
@@ -6,6 +6,7 @@
 
 import os
 import datetime
+from unittest.mock import MagicMock, patch
 
 import pyexcel as pe
 from _compact import OrderedDict
@@ -13,7 +14,6 @@ from pyexcel_xls import save_data
 from pyexcel_xls.xlsr import xldate_to_python_date
 from pyexcel_xls.xlsw import XLSWriter as Writer
 
-from mock import patch
 from nose import SkipTest
 from nose.tools import eq_, raises
 
@@ -79,13 +79,11 @@ def test_issue_16_file_stream_has_no_getvalue():
 
 @patch("xlrd.open_workbook")
 def test_issue_18_encoding_override_isnt_passed(fake_open):
-    fake_open.return_value = None
+    fake_open.return_value = MagicMock(sheets=MagicMock(return_value=[]))
     test_encoding = "utf-32"
-    from pyexcel_xls.xlsr import XLSBook
+    from pyexcel_xls.xlsr import XLSInFile
 
-    book = XLSBook()
-    book.open("fake_file.xls", encoding_override=test_encoding)
-    book._get_book()
+    XLSInFile("fake_file.xls", "xls", encoding_override=test_encoding)
     keywords = fake_open.call_args[1]
     assert keywords["encoding_override"] == test_encoding
 
@@ -112,7 +110,7 @@ def test_empty_book_pyexcel_issue_120():
     """
     https://github.com/pyexcel/pyexcel/issues/120
     """
-    writer = Writer()
+    writer = Writer("fake.xls", "xls")
     writer.write({})
 
 
diff --git a/tests/test_filter.py b/tests/test_filter.py
index c9bc9e0..cd07d3c 100644
--- a/tests/test_filter.py
+++ b/tests/test_filter.py
@@ -1,8 +1,9 @@
 import os
 
-from nose.tools import eq_
 from pyexcel_io import get_data, save_data
 
+from nose.tools import eq_
+
 
 class TestFilter:
     def setUp(self):
diff --git a/tests/test_writer.py b/tests/test_writer.py
index e36454a..aef9a59 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -1,7 +1,7 @@
 import os
 
 from base import PyexcelWriterBase, PyexcelHatWriterBase
-from pyexcel_xls.xlsr import XLSBook as Reader
+from pyexcel_xls.xlsr import XLSReader as Reader
 from pyexcel_xls.xlsw import XLSWriter as Writer
 
 
@@ -13,12 +13,10 @@ class TestNativeXLSWriter:
             "Sheet3": [[u"X", u"Y", u"Z"], [1, 4, 7], [2, 5, 8], [3, 6, 9]],
         }
         self.testfile = "writer.xls"
-        writer = Writer()
-        writer.open(self.testfile)
+        writer = Writer(self.testfile, "xls")
         writer.write(self.content)
         writer.close()
-        reader = Reader()
-        reader.open(self.testfile)
+        reader = Reader("xls", filename=self.testfile)
         content = reader.read_all()
         for key in content.keys():
             content[key] = list(content[key])