From d3b57b434b7a4920dcf02e2be5d054259e42736b Mon Sep 17 00:00:00 2001 From: jaska Date: Tue, 29 Sep 2020 22:26:05 +0100 Subject: [PATCH] New style io (#36) * :tada: new style reader * :tada: new style writer and reader plugins * This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst * :green_heart: update moban update * This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst * :green_heart: use pyexcel-io dev branch * :books: update PR template * :horse: code refactoring Co-authored-by: chfw --- .github/PULL_REQUEST_TEMPLATE.md | 6 +- .github/workflows/moban-update.yml | 42 +++++---- .gitignore | 4 + CONTRIBUTORS.rst | 2 +- Makefile | 3 - lint.sh | 3 +- pyexcel_xls/__init__.py | 21 ++++- pyexcel_xls/xlsr.py | 144 ++++++++++++----------------- pyexcel_xls/xlsw.py | 37 ++++---- rnd_requirements.txt | 2 +- tests/test_bug_fixes.py | 12 +-- tests/test_filter.py | 3 +- tests/test_writer.py | 8 +- 13 files changed, 133 insertions(+), 154 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 7d1b0c8..6017f21 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,11 +1,9 @@ With your PR, here is a check list: -- [ ] Has Test cases written -- [ ] Has all code lines tested +- [ ] Has test cases written? +- [ ] Has all code lines tested? - [ ] Has `make format` been run? - [ ] Please update CHANGELOG.yml(not CHANGELOG.rst) - [ ] Passes all Travis CI builds - [ ] Has fair amount of documentation if your change is complex -- [ ] run 'make format' so as to confirm the pyexcel organisation's coding style -- [ ] Please add yourself to 'contributors' section of pyexcel-xls.yml (if not found, please use CONTRIBUTORS.rst) - [ ] Agree on NEW BSD License for your contribution diff --git a/.github/workflows/moban-update.yml b/.github/workflows/moban-update.yml index db4f13a..706fd82 100644 --- a/.github/workflows/moban-update.yml +++ b/.github/workflows/moban-update.yml @@ -5,23 +5,25 @@ jobs: runs-on: ubuntu-latest name: synchronize templates via moban steps: - - uses: actions/checkout@v2 - with: - ref: ${{ github.head_ref }} - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: '3.7' - - name: check changes - run: | - pip install moban gitfs2 pypifs - moban - git status - git diff --exit-code - - name: Auto-commit - if: failure() - uses: docker://cdssnc/auto-commit-github-action - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - args: This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst + - uses: actions/checkout@v2 + with: + ref: ${{ github.head_ref }} + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: '3.7' + - name: check changes + run: | + pip install moban gitfs2 pypifs moban-jinja2-github moban-ansible + moban + git status + git diff --exit-code + - name: Auto-commit + if: failure() + uses: docker://cdssnc/auto-commit-github-action + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + args: >- + This is an auto-commit, updating project meta data, + such as changelog.rst, contributors.rst diff --git a/.gitignore b/.gitignore index a9ca840..e8b12f9 100644 --- a/.gitignore +++ b/.gitignore @@ -540,3 +540,7 @@ cscope.files cscope.out cscope.in.out cscope.po.out + + +# remove moban hash dictionary +.moban.hashes diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst index c8e37ad..bfd5904 100644 --- a/CONTRIBUTORS.rst +++ b/CONTRIBUTORS.rst @@ -4,4 +4,4 @@ In alphabetical order: -* `John Vandenberg `_ +* `John Vandenberg `_ diff --git a/Makefile b/Makefile index d7e640e..3e0ee51 100644 --- a/Makefile +++ b/Makefile @@ -6,9 +6,6 @@ test: lint install_test: pip install -r tests/requirements.txt -git-diff-check: - git diff --exit-code - lint: bash lint.sh diff --git a/lint.sh b/lint.sh index 6907d07..891aa63 100644 --- a/lint.sh +++ b/lint.sh @@ -1,3 +1,2 @@ pip install flake8 -flake8 . --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long -python setup.py checkdocs +flake8 --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long . && python setup.py checkdocs diff --git a/pyexcel_xls/__init__.py b/pyexcel_xls/__init__.py index f512cfc..a688b5b 100644 --- a/pyexcel_xls/__init__.py +++ b/pyexcel_xls/__init__.py @@ -4,7 +4,7 @@ The lower level xls/xlsx/xlsm file format handler using xlrd/xlwt - :copyright: (c) 2016-2017 by Onni Software Ltd + :copyright: (c) 2016-2020 by Onni Software Ltd :license: New BSD License """ @@ -15,15 +15,28 @@ from pyexcel_io.io import save_data as write_data # this line has to be place above all else # because of dynamic import -from pyexcel_io.plugins import IOPluginInfoChain +from pyexcel_io.plugins import IOPluginInfoChainV2 __FILE_TYPE__ = "xls" -IOPluginInfoChain(__name__).add_a_reader( - relative_plugin_class_path="xlsr.XLSBook", + +IOPluginInfoChainV2(__name__).add_a_reader( + relative_plugin_class_path="xlsr.XLSInFile", + locations=["file"], + file_types=[__FILE_TYPE__, "xlsx", "xlsm"], + stream_type="binary", +).add_a_reader( + relative_plugin_class_path="xlsr.XLSInMemory", + locations=["memory"], + file_types=[__FILE_TYPE__, "xlsx", "xlsm"], + stream_type="binary", +).add_a_reader( + relative_plugin_class_path="xlsr.XLSInContent", + locations=["content"], file_types=[__FILE_TYPE__, "xlsx", "xlsm"], stream_type="binary", ).add_a_writer( relative_plugin_class_path="xlsw.XLSWriter", + locations=["file", "memory"], file_types=[__FILE_TYPE__], stream_type="binary", ) diff --git a/pyexcel_xls/xlsr.py b/pyexcel_xls/xlsr.py index a6b9b97..7a635e9 100644 --- a/pyexcel_xls/xlsr.py +++ b/pyexcel_xls/xlsr.py @@ -4,17 +4,16 @@ The lower level xls/xlsm file format handler using xlrd - :copyright: (c) 2016-2017 by Onni Software Ltd + :copyright: (c) 2016-2020 by Onni Software Ltd :license: New BSD License """ import datetime import xlrd - -from pyexcel_io.book import BookReader -from pyexcel_io.sheet import SheetReader from pyexcel_io.service import has_no_digits_in_float -from pyexcel_io._compact import OrderedDict, irange +from pyexcel_io._compact import irange +from pyexcel_io.plugin_api.abstract_sheet import ISheet +from pyexcel_io.plugin_api.abstract_reader import IReader XLS_KEYWORDS = [ "filename", @@ -45,7 +44,7 @@ class MergedCell(object): registry[key] = self -class XLSheet(SheetReader): +class XLSheet(ISheet): """ xls, xlsx, xlsm sheet reader @@ -53,12 +52,13 @@ class XLSheet(SheetReader): """ def __init__(self, sheet, auto_detect_int=True, date_mode=0, **keywords): - SheetReader.__init__(self, sheet, **keywords) self.__auto_detect_int = auto_detect_int self.__hidden_cols = [] self.__hidden_rows = [] self.__merged_cells = {} self._book_date_mode = date_mode + self._native_sheet = sheet + self._keywords = keywords if keywords.get("detect_merged_cells") is True: for merged_cell_ranges in sheet.merged_cells: merged_cells = MergedCell(*merged_cell_ranges) @@ -75,17 +75,14 @@ class XLSheet(SheetReader): def name(self): return self._native_sheet.name - def number_of_rows(self): - """ - Number of rows in the xls sheet - """ - return self._native_sheet.nrows - len(self.__hidden_rows) + def row_iterator(self): + number_of_rows = self._native_sheet.nrows - len(self.__hidden_rows) + return range(number_of_rows) - def number_of_columns(self): - """ - Number of columns in the xls sheet - """ - return self._native_sheet.ncols - len(self.__hidden_cols) + def column_iterator(self, row): + number_of_columns = self._native_sheet.ncols - len(self.__hidden_cols) + for column in range(number_of_columns): + yield self.cell_value(row, column) def cell_value(self, row, column): """ @@ -127,93 +124,48 @@ def calculate_offsets(incoming_index, hidden_indices): return incoming_index + offset -class XLSBook(BookReader): +class XLSReader(IReader): """ XLSBook reader It reads xls, xlsm, xlsx work book """ - def __init__(self): - BookReader.__init__(self) - self._file_content = None - self.__skip_hidden_sheets = True - self.__skip_hidden_row_column = True - self.__detect_merged_cells = False - - def open(self, file_name, **keywords): - self.__parse_keywords(**keywords) - BookReader.open(self, file_name, **keywords) - - def open_stream(self, file_stream, **keywords): - self.__parse_keywords(**keywords) - BookReader.open_stream(self, file_stream, **keywords) - - def open_content(self, file_content, **keywords): - self.__parse_keywords(**keywords) - self._keywords = keywords - self._file_content = file_content - - def __parse_keywords(self, **keywords): + def __init__(self, file_type, **keywords): self.__skip_hidden_sheets = keywords.get("skip_hidden_sheets", True) self.__skip_hidden_row_column = keywords.get( "skip_hidden_row_and_column", True ) self.__detect_merged_cells = keywords.get("detect_merged_cells", False) + self._keywords = keywords + xlrd_params = self._extract_xlrd_params() + if self.__skip_hidden_row_column and file_type == "xls": + xlrd_params["formatting_info"] = True + if self.__detect_merged_cells: + xlrd_params["formatting_info"] = True + + self.content_array = [] + self._native_book = self.get_xls_book(**xlrd_params) + for sheet in self._native_book.sheets(): + if self.__skip_hidden_sheets and sheet.visibility != 0: + continue + self.content_array.append(sheet) + + def read_sheet(self, index): + native_sheet = self.content_array[index] + sheet = XLSheet( + native_sheet, + date_mode=self._native_book.datemode, + **self._keywords + ) + return sheet def close(self): if self._native_book: self._native_book.release_resources() self._native_book = None - def read_sheet_by_index(self, sheet_index): - self._native_book = self._get_book(on_demand=True) - sheet = self._native_book.sheet_by_index(sheet_index) - return self.read_sheet(sheet) - - def read_sheet_by_name(self, sheet_name): - self._native_book = self._get_book(on_demand=True) - try: - sheet = self._native_book.sheet_by_name(sheet_name) - except xlrd.XLRDError: - raise ValueError("%s cannot be found" % sheet_name) - return self.read_sheet(sheet) - - def read_all(self): - result = OrderedDict() - self._native_book = self._get_book() - for sheet in self._native_book.sheets(): - if self.__skip_hidden_sheets and sheet.visibility != 0: - continue - data_dict = self.read_sheet(sheet) - result.update(data_dict) - return result - - def read_sheet(self, native_sheet): - sheet = XLSheet( - native_sheet, - date_mode=self._native_book.datemode, - **self._keywords - ) - return {sheet.name: sheet.to_array()} - - def _get_book(self, on_demand=False): - xlrd_params = self._extract_xlrd_params() - xlrd_params["on_demand"] = on_demand - - if self._file_name: - xlrd_params["filename"] = self._file_name - elif self._file_stream: - file_content = self._file_stream.read() - xlrd_params["file_contents"] = file_content - elif self._file_content is not None: - xlrd_params["file_contents"] = self._file_content - else: - raise IOError("No valid file name or file content found.") - if self.__skip_hidden_row_column and self._file_type == "xls": - xlrd_params["formatting_info"] = True - if self.__detect_merged_cells: - xlrd_params["formatting_info"] = True + def get_xls_book(self, **xlrd_params): xls_book = xlrd.open_workbook(**xlrd_params) return xls_book @@ -226,6 +178,24 @@ class XLSBook(BookReader): return params +class XLSInFile(XLSReader): + def __init__(self, file_name, file_type, **keywords): + super().__init__(file_type, filename=file_name, **keywords) + + +class XLSInContent(XLSReader): + def __init__(self, file_content, file_type, **keywords): + super().__init__(file_type, file_contents=file_content, **keywords) + + +class XLSInMemory(XLSReader): + def __init__(self, file_stream, file_type, **keywords): + file_stream.seek(0) + super().__init__( + file_type, file_contents=file_stream.read(), **keywords + ) + + def xldate_to_python_date(value, date_mode): """ convert xl date to python date diff --git a/pyexcel_xls/xlsw.py b/pyexcel_xls/xlsw.py index 5dd1520..7d08c1b 100644 --- a/pyexcel_xls/xlsw.py +++ b/pyexcel_xls/xlsw.py @@ -4,16 +4,15 @@ The lower level xls file format handler using xlwt - :copyright: (c) 2016-2017 by Onni Software Ltd + :copyright: (c) 2016-2020 by Onni Software Ltd :license: New BSD License """ import datetime import xlrd from xlwt import XFStyle, Workbook - -from pyexcel_io.book import BookWriter from pyexcel_io.sheet import SheetWriter +from pyexcel_io.plugin_api.abstract_writer import IWriter DEFAULT_DATE_FORMAT = "DD/MM/YY" DEFAULT_TIME_FORMAT = "HH:MM:SS" @@ -27,8 +26,7 @@ class XLSheetWriter(SheetWriter): """ def set_sheet_name(self, name): - """Create a sheet - """ + """Create a sheet""" self._native_sheet = self._native_book.add_sheet(name) self.current_row = 0 @@ -68,32 +66,33 @@ class XLSheetWriter(SheetWriter): self.current_row += 1 -class XLSWriter(BookWriter): +class XLSWriter(IWriter): """ xls writer """ - def __init__(self): - BookWriter.__init__(self) - self.work_book = None - - def open( - self, file_name, encoding="ascii", style_compression=2, **keywords + def __init__( + self, + file_alike_object, + _, # file_type not used + encoding="ascii", + style_compression=2, + **keywords ): - BookWriter.open(self, file_name, **keywords) + self._file_alike_object = file_alike_object self.work_book = Workbook( style_compression=style_compression, encoding=encoding ) - def write(self, incoming_dict): - if incoming_dict: - BookWriter.write(self, incoming_dict) - else: - raise NotImplementedError(EMPTY_SHEET_NOT_ALLOWED) - def create_sheet(self, name): return XLSheetWriter(self.work_book, None, name) + def write(self, incoming_dict): + if incoming_dict: + IWriter.write(self, incoming_dict) + else: + raise NotImplementedError(EMPTY_SHEET_NOT_ALLOWED) + def close(self): """ This call actually save the file diff --git a/rnd_requirements.txt b/rnd_requirements.txt index 6d9386d..dffaa2d 100644 --- a/rnd_requirements.txt +++ b/rnd_requirements.txt @@ -1,2 +1,2 @@ -https://github.com/pyexcel/pyexcel-io/archive/master.zip +https://github.com/pyexcel/pyexcel-io/archive/dev.zip diff --git a/tests/test_bug_fixes.py b/tests/test_bug_fixes.py index d298374..f38abfc 100644 --- a/tests/test_bug_fixes.py +++ b/tests/test_bug_fixes.py @@ -6,6 +6,7 @@ import os import datetime +from unittest.mock import MagicMock, patch import pyexcel as pe from _compact import OrderedDict @@ -13,7 +14,6 @@ from pyexcel_xls import save_data from pyexcel_xls.xlsr import xldate_to_python_date from pyexcel_xls.xlsw import XLSWriter as Writer -from mock import patch from nose import SkipTest from nose.tools import eq_, raises @@ -79,13 +79,11 @@ def test_issue_16_file_stream_has_no_getvalue(): @patch("xlrd.open_workbook") def test_issue_18_encoding_override_isnt_passed(fake_open): - fake_open.return_value = None + fake_open.return_value = MagicMock(sheets=MagicMock(return_value=[])) test_encoding = "utf-32" - from pyexcel_xls.xlsr import XLSBook + from pyexcel_xls.xlsr import XLSInFile - book = XLSBook() - book.open("fake_file.xls", encoding_override=test_encoding) - book._get_book() + XLSInFile("fake_file.xls", "xls", encoding_override=test_encoding) keywords = fake_open.call_args[1] assert keywords["encoding_override"] == test_encoding @@ -112,7 +110,7 @@ def test_empty_book_pyexcel_issue_120(): """ https://github.com/pyexcel/pyexcel/issues/120 """ - writer = Writer() + writer = Writer("fake.xls", "xls") writer.write({}) diff --git a/tests/test_filter.py b/tests/test_filter.py index c9bc9e0..cd07d3c 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -1,8 +1,9 @@ import os -from nose.tools import eq_ from pyexcel_io import get_data, save_data +from nose.tools import eq_ + class TestFilter: def setUp(self): diff --git a/tests/test_writer.py b/tests/test_writer.py index e36454a..aef9a59 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -1,7 +1,7 @@ import os from base import PyexcelWriterBase, PyexcelHatWriterBase -from pyexcel_xls.xlsr import XLSBook as Reader +from pyexcel_xls.xlsr import XLSReader as Reader from pyexcel_xls.xlsw import XLSWriter as Writer @@ -13,12 +13,10 @@ class TestNativeXLSWriter: "Sheet3": [[u"X", u"Y", u"Z"], [1, 4, 7], [2, 5, 8], [3, 6, 9]], } self.testfile = "writer.xls" - writer = Writer() - writer.open(self.testfile) + writer = Writer(self.testfile, "xls") writer.write(self.content) writer.close() - reader = Reader() - reader.open(self.testfile) + reader = Reader("xls", filename=self.testfile) content = reader.read_all() for key in content.keys(): content[key] = list(content[key])