diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..0faea60 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,4 @@ +# These are supported funding model platforms + +github: chfw +patreon: chfw diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..8996445 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,8 @@ +With your PR, here is a check list: + +- [ ] Has test cases written? +- [ ] Has all code lines tested? +- [ ] Has `make format` been run? +- [ ] Please update CHANGELOG.yml(not CHANGELOG.rst) +- [ ] Has fair amount of documentation if your change is complex +- [ ] Agree on NEW BSD License for your contribution diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..3789494 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,20 @@ +name: lint + +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + name: lint code + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: lint + run: | + pip --use-deprecated=legacy-resolver install flake8 + pip --use-deprecated=legacy-resolver install -r tests/requirements.txt + flake8 --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long . + python setup.py checkdocs diff --git a/.github/workflows/moban-update.yml b/.github/workflows/moban-update.yml new file mode 100644 index 0000000..73a3aed --- /dev/null +++ b/.github/workflows/moban-update.yml @@ -0,0 +1,28 @@ +on: [push] + +jobs: + run_moban: + runs-on: ubuntu-latest + name: synchronize templates via moban + steps: + - uses: actions/checkout@v2 + with: + ref: ${{ github.head_ref }} + token: ${{ secrets.PAT }} + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: '3.7' + - name: check changes + run: | + pip install moban gitfs2 pypifs moban-jinja2-github moban-ansible + moban + git status + git diff --exit-code + - name: Auto-commit + if: failure() + uses: stefanzweifel/git-auto-commit-action@v4 + with: + commit_message: >- + This is an auto-commit, updating project meta data, + such as changelog.rst, contributors.rst diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml new file mode 100644 index 0000000..e19f742 --- /dev/null +++ b/.github/workflows/pythonpackage.yml @@ -0,0 +1,30 @@ +name: Unit tests on ubuntu + +on: [push] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + max-parallel: 4 + matrix: + python-version: [3.6, 3.7, 3.8] + + steps: + - uses: actions/checkout@v1 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Lint with flake8 + run: | + make install_test format git-diff-check lint + - name: Test + run: | + pip install -r tests/requirements.txt + make diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml new file mode 100644 index 0000000..9e7ec42 --- /dev/null +++ b/.github/workflows/pythonpublish.yml @@ -0,0 +1,26 @@ +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..801d2cd --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,35 @@ +name: Run unit tests on Windows and Mac + +on: [push, pull_request] + +jobs: + test: + strategy: + fail-fast: false + matrix: + python-version: [3.6, 3.7, 3.8, 3.9] + os: [macOs-latest, ubuntu-latest, windows-latest] + exclude: + - os: macOs-latest + python-version: 3.6 + + runs-on: ${{ matrix.os }} + name: run tests + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: install + run: | + pip --use-deprecated=legacy-resolver install -r requirements.txt + pip --use-deprecated=legacy-resolver install -r tests/requirements.txt + - name: test + run: | + pip freeze + nosetests --verbosity=3 --with-coverage --cover-package pyexcel_io --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io + - name: Upload coverage + uses: codecov/codecov-action@v1 + with: + name: ${{ matrix.os }} Python ${{ matrix.python-version }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 89f9f44..e8b12f9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,546 @@ -*.pyc -*~ +# moban hashes +.moban.hashes + +# Extra rules from https://github.com/github/gitignore/ +# Python rules +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ .coverage -pyexcel*-info -build -dist -tmp.db -.idea/* +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# VirtualEnv rules +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +.Python +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +.venv +pip-selfcheck.json + +# Linux rules +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +# Windows rules +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# macOS rules +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# Emacs rules +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +# network security +/network-security.data + + +# Vim rules +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +*~ +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +# JetBrains rules +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +# SublimeText rules +# Cache files for Sublime Text +*.tmlanguage.cache +*.tmPreferences.cache +*.stTheme.cache + +# Workspace files are user-specific +*.sublime-workspace + +# Project files should be checked into the repository, unless a significant +# proportion of contributors will probably not be using Sublime Text +# *.sublime-project + +# SFTP configuration file +sftp-config.json +sftp-config-alt*.json + +# Package control specific files +Package Control.last-run +Package Control.ca-list +Package Control.ca-bundle +Package Control.system-ca-bundle +Package Control.cache/ +Package Control.ca-certs/ +Package Control.merged-ca-bundle +Package Control.user-ca-bundle +oscrypto-ca-bundle.crt +bh_unicode_properties.cache + +# Sublime-github package stores a github token in this file +# https://packagecontrol.io/packages/sublime-github +GitHub.sublime-settings + +# KDevelop4 rules +*.kdev4 +.kdev4/ + +# Kate rules +# Swap Files # +.*.kate-swp +.swp.* + +# TextMate rules +*.tmproj +*.tmproject +tmtags + +# VisualStudioCode rules +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Xcode rules +# Xcode +# +# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore + +## User settings +xcuserdata/ + +## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9) +*.xcscmblueprint +*.xccheckout + +## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4) +build/ +DerivedData/ +*.moved-aside +*.pbxuser +!default.pbxuser +*.mode1v3 +!default.mode1v3 +*.mode2v3 +!default.mode2v3 +*.perspectivev3 +!default.perspectivev3 + +## Gcc Patch +/*.gcno + +# Eclipse rules +.metadata +bin/ +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.settings/ +.loadpath +.recommenders + +# External tool builders +.externalToolBuilders/ + +# Locally stored "Eclipse launch configurations" +*.launch + +# PyDev specific (Python IDE for Eclipse) +*.pydevproject + +# CDT-specific (C/C++ Development Tooling) +.cproject + +# CDT- autotools +.autotools + +# Java annotation processor (APT) +.factorypath + +# PDT-specific (PHP Development Tools) +.buildpath + +# sbteclipse plugin +.target + +# Tern plugin +.tern-project + +# TeXlipse plugin +.texlipse + +# STS (Spring Tool Suite) +.springBeans + +# Code Recommenders +.recommenders/ + +# Annotation Processing +.apt_generated/ +.apt_generated_test/ + +# Scala IDE specific (Scala & Java development for Eclipse) +.cache-main +.scala_dependencies +.worksheet + +# Uncomment this line if you wish to ignore the project description file. +# Typically, this file would be tracked if it contains build/dependency configurations: +#.project + +# TortoiseGit rules +# Project-level settings +/.tgitconfig + +# Tags rules +# Ignore tags created by etags, ctags, gtags (GNU global) and cscope +TAGS +.TAGS +!TAGS/ +tags +.tags +!tags/ +gtags.files +GTAGS +GRTAGS +GPATH +GSYMS +cscope.files +cscope.out +cscope.in.out +cscope.po.out + + +# remove moban hash dictionary +.moban.hashes diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..d047cd9 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,10 @@ +[settings] +line_length=79 +known_first_party=lml, pyexcel +known_third_party=nose +indent=' ' +multi_line_output=3 +length_sort=1 +default_section=FIRSTPARTY +no_lines_before=LOCALFOLDER +sections=FUTURE,STDLIB,FIRSTPARTY,THIRDPARTY,LOCALFOLDER diff --git a/.moban.d/custom_travis.yml.jj2 b/.moban.d/custom_travis.yml.jj2 new file mode 100644 index 0000000..9a18ae8 --- /dev/null +++ b/.moban.d/custom_travis.yml.jj2 @@ -0,0 +1,13 @@ +{% extends "travis.yml.jj2" %} +{%block extra_matrix %} +env: + - MINREQ=1 +{%endblock%} +{%block custom_python_versions%} +python: + - 3.8 + - 3.7 + - 3.6 +{%endblock%} +{%block pypi_deployment%} +{%endblock %} \ No newline at end of file diff --git a/.moban.d/docs/source/conf.py b/.moban.d/docs/source/custom_conf.py.jj2 similarity index 99% rename from .moban.d/docs/source/conf.py rename to .moban.d/docs/source/custom_conf.py.jj2 index 74678d7..ea8c266 100644 --- a/.moban.d/docs/source/conf.py +++ b/.moban.d/docs/source/custom_conf.py.jj2 @@ -12,4 +12,3 @@ def setup(app): {%endblock%} - diff --git a/.moban.d/docs/source/index.rst b/.moban.d/docs/source/index.rst.jj2 similarity index 94% rename from .moban.d/docs/source/index.rst rename to .moban.d/docs/source/index.rst.jj2 index 8b9c737..faf8d4d 100644 --- a/.moban.d/docs/source/index.rst +++ b/.moban.d/docs/source/index.rst.jj2 @@ -57,7 +57,9 @@ get_data(.., library='pyexcel-ods') ============= ======= ======== ======= ======== ======== ======== `pyexcel-io`_ `xls`_ `xlsx`_ `ods`_ `ods3`_ `odsr`_ `xlsxw`_ ============= ======= ======== ======= ======== ======== ======== - 0.5.1 0.5.0 0.5.0 0.5.0 0.5.0 0.5.0 0.5.0 + 0.6.0+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+ + 0.5.10+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+ + 0.5.1+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.3.0+ 0.3.0+ 0.3.0 0.3.0+ 0.3.0+ 0.3.0 0.3.0 0.2.2+ 0.2.2+ 0.2.2+ 0.2.1+ 0.2.1+ 0.0.1 @@ -106,6 +108,7 @@ API .. autosummary:: :toctree: api/ + iget_data get_data save_data diff --git a/.moban.d/README.rst b/.moban.d/io_readme.rst.jj2 similarity index 100% rename from .moban.d/README.rst rename to .moban.d/io_readme.rst.jj2 diff --git a/.moban.d/setup.py b/.moban.d/io_setup.py.jj2 similarity index 99% rename from .moban.d/setup.py rename to .moban.d/io_setup.py.jj2 index 1e6bb8f..bf85899 100644 --- a/.moban.d/setup.py +++ b/.moban.d/io_setup.py.jj2 @@ -6,4 +6,3 @@ {%block pyexcel_extra_classifiers%} 'Programming Language :: Python :: Implementation :: PyPy' {%endblock%}} - diff --git a/.moban.d/tests/requirements.txt b/.moban.d/tests/requirements.txt deleted file mode 100644 index ea7c0e3..0000000 --- a/.moban.d/tests/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -{% extends 'tests/requirements.txt.jj2' %} -{%block extras %} -SQLAlchemy -pyexcel>=0.2.0 -pyexcel-xls>=0.1.0 -{%endblock%} diff --git a/.moban.yml b/.moban.yml index f773b2b..864d61c 100644 --- a/.moban.yml +++ b/.moban.yml @@ -1,26 +1,9 @@ -requires: - - type: git - url: https://github.com/moremoban/pypi-mobans - submodule: true - - https://github.com/pyexcel/pyexcel-mobans +overrides: "git://github.com/pyexcel/pyexcel-mobans!/mobanfile.yaml" configuration: - configuration_dir: "pyexcel-mobans:config" - template_dir: - - "pyexcel-mobans:templates" - - "pypi-mobans:templates" - - ".moban.d" configuration: pyexcel-io.yml targets: - - "docs/source/conf.py": "docs/source/conf.py" - - setup.py: setup.py - - .travis.yml: travis.yml.jj2 - - requirements.txt: requirements.txt.jj2 - - "tests/requirements.txt": "tests/requirements.txt" - - LICENSE: NEW_BSD_LICENSE.jj2 - - test.sh: test.script.jj2 - - test.bat: test.script.jj2 - - README.rst: README.rst - - "docs/source/index.rst": "docs/source/index.rst" - - output: CHANGELOG.rst - configuration: changelog.yml - template: CHANGELOG.rst.jj2 + - "docs/source/conf.py": "docs/source/custom_conf.py.jj2" + - setup.py: io_setup.py.jj2 + - README.rst: io_readme.rst.jj2 + - "docs/source/index.rst": "docs/source/index.rst.jj2" + - .gitignore: gitignore.jj2 diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..a379070 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,18 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/source/conf.py + +# Optionally build your docs in additional formats such as PDF +formats: + - pdf + +# Optionally set the version of Python and requirements required to build your docs +python: + version: 3.7 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 8295c53..0000000 --- a/.travis.yml +++ /dev/null @@ -1,23 +0,0 @@ -sudo: false -language: python -notifications: - email: false -python: - - pypy-5.3.1 - - 3.7-dev - - 3.6 - - 3.5 - - 3.4 - - 2.7 -before_install: - - if [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then pip install flake8==2.6.2; fi - - if [[ -f min_requirements.txt && "$MINREQ" -eq 1 ]]; then - mv min_requirements.txt requirements.txt ; - fi - - test ! -f rnd_requirements.txt || pip install --no-deps -r rnd_requirements.txt - - test ! -f rnd_requirements.txt || pip install -r rnd_requirements.txt ; - - pip install -r tests/requirements.txt -script: - - make test -after_success: - codecov diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1553351..5cd5f17 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,11 +1,134 @@ Change log ================================================================================ +0.6.6 - 31.1.2022 +-------------------------------------------------------------------------------- + +**updated** + +#. `#112 `_: Log Empty Row + Warning instead 'print' + +0.6.5 - 08.10.2021 +-------------------------------------------------------------------------------- + +**updated** + +#. `#109 `_: enable ods3 to + have datetime + +0.6.4 - 31.10.2020 +-------------------------------------------------------------------------------- + +**updated** + +#. `#102 `_: skip columns from + imported excel sheet. + +0.6.3 - 12.10.2020 +-------------------------------------------------------------------------------- + +**fixed** + +#. `#96 `_: regression: unknown + file type shall trigger NoSupportingPluginFound + +**updated** + +#. extra dependencies uses 0.6.0 based plugins + +0.6.2 - 7.10.2020 +-------------------------------------------------------------------------------- + +**updated** + +#. `#94 `_: keep backward + compatibility for pyexcel-xls 0.4.1 + +0.6.1 - 7.10.2020 +-------------------------------------------------------------------------------- + +**removed** + +#. python 3.6 lower versions are no longer supported + +**updated** + +#. pyexcel-io plugin interface has been rewritten. PyInstaller user will be + impacted. please read 'Packaging with Pyinstaller' in the documentation. +#. new query set reader plugin. pyexcel<=0.6.4 has used intrusive way of getting + query set source done. it is against the plugin interface. + +**fixed** + +#. `#74 `_: handle zip files + which contain non-UTF-8 encoded files. + +**added** + +#. `#86 `_: allow trailing + options, get_data(...keep_trailing_empty_cells=True). + +0.5.20 - 17.7.2019 +-------------------------------------------------------------------------------- + +**updated** + +#. `#70 `_: when the given file + is a root directory, the error shall read it is not a file + +0.5.19 - 14.7.2019 +-------------------------------------------------------------------------------- + +**updated** + +#. `pyexcel#185 `_: handle stream + conversion if file type(html) needs string content then bytes to handle + +0.5.18 - 12.06.2019 +-------------------------------------------------------------------------------- + +**updated** + +#. `#69 `_: Force file + type(force_file_type) on write + +0.5.17 - 04.04.2019 +-------------------------------------------------------------------------------- + +**updated** + +#. `#68 `_: Raise IOError when + the data file does not exist + +0.5.16 - 19.03.2019 +-------------------------------------------------------------------------------- + +**updated** + +#. `#67 `_: fix conversion + issue for long type on python 2.7 for ods + +0.5.15 - 16.03.2019 +-------------------------------------------------------------------------------- + +**updated** + +#. `pyexcel-ods#33 `_: fix + integer comparision error on i586 + +0.5.14 - 21.02.2019 +-------------------------------------------------------------------------------- + +**updated** + +#. `#65 `_: add + tests/__init__.py because python2.7 setup.py test needs it + 0.5.13 - 12.02.2019 -------------------------------------------------------------------------------- -updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**updated** #. `#63 `_: Version 0.5.12 prevents xslx and ods plugin from being loaded @@ -13,19 +136,17 @@ updated 0.5.12 - 9.02.2019 -------------------------------------------------------------------------------- -updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**updated** #. `#60 `_: include tests in tar ball #. `#61 `_: enable python setup.py test -0.5.10 - 3.12.2018 +0.5.11 - 3.12.2018 -------------------------------------------------------------------------------- -updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**updated** #. `#59 `_: Please use scan_plugins_regex, which lml 0.7 complains about @@ -33,8 +154,7 @@ updated 0.5.10 - 27.11.2018 -------------------------------------------------------------------------------- -added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**added** #. `#57 `_, long type will not be written in ods. please use string type. And if the integer is equal or @@ -45,8 +165,7 @@ added 0.5.9.1 - 30.08.2018 -------------------------------------------------------------------------------- -updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**updated** #. `#53 `_, upgrade lml dependency to at least 0.0.2 @@ -54,8 +173,7 @@ updated 0.5.9 - 23.08.2018 -------------------------------------------------------------------------------- -added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**added** #. `pyexcel#148 `_, support force_file_type @@ -63,8 +181,7 @@ added 0.5.8 - 16.08.2018 -------------------------------------------------------------------------------- -added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**added** #. `#49 `_, support additional options when detecting float values in csv format. default_float_nan, @@ -73,8 +190,7 @@ added 0.5.7 - 02.05.2018 -------------------------------------------------------------------------------- -fixed -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**fixed** #. `#48 `_, turn off pep 0515 #. `#47 `_, csv reader cannot @@ -83,8 +199,7 @@ fixed 0.5.6 - 11.01.2018 -------------------------------------------------------------------------------- -fixed -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**fixed** #. `#46 `_, expose `bulk_save` to developer @@ -92,8 +207,7 @@ fixed 0.5.5 - 23.12.2017 -------------------------------------------------------------------------------- -fixed -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**fixed** #. Issue `#45 `_, csv reader throws exception because google app engine does not support mmap. People who @@ -103,8 +217,7 @@ fixed 0.5.4 - 10.11.2017 -------------------------------------------------------------------------------- -updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**updated** #. PR `#44 `_, use unicodewriter for csvz writers. @@ -112,8 +225,7 @@ updated 0.5.3 - 23.10.2017 -------------------------------------------------------------------------------- -updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**updated** #. pyexcel `pyexcel#105 `_, remove gease from setup_requires, introduced by 0.5.2. @@ -122,8 +234,7 @@ updated 0.5.2 - 20.10.2017 -------------------------------------------------------------------------------- -added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**added** #. `pyexcel#103 `_, include LICENSE file in MANIFEST.in, meaning LICENSE file will appear in the released @@ -132,8 +243,7 @@ added 0.5.1 - 02.09.2017 -------------------------------------------------------------------------------- -Fixed -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Fixed** #. `pyexcel-ods#25 `_, Unwanted dependency on pyexcel. @@ -141,13 +251,11 @@ Fixed 0.5.0 - 30.08.2017 -------------------------------------------------------------------------------- -Added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Added** #. Collect all data type conversion codes as service.py. -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#19 `_, use cString by default. For python, it will be a performance boost @@ -155,8 +263,7 @@ Updated 0.4.4 - 08.08.2017 -------------------------------------------------------------------------------- -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#42 `_, raise exception if database table name does not match the sheet name @@ -164,8 +271,7 @@ Updated 0.4.3 - 29.07.2017 -------------------------------------------------------------------------------- -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#41 `_, walk away gracefully when mmap is not available. @@ -173,8 +279,7 @@ Updated 0.4.2 - 05.07.2017 -------------------------------------------------------------------------------- -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#37 `_, permanently fix the residue folder pyexcel by release all future releases in a clean clone. @@ -182,8 +287,7 @@ Updated 0.4.1 - 29.06.2017 -------------------------------------------------------------------------------- -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#39 `_, raise exception when bulk save in django fails. Please `bulk_save=False` if you as the @@ -195,8 +299,7 @@ Updated 0.4.0 - 19.06.2017 -------------------------------------------------------------------------------- -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. 'built-in' as the value to the parameter 'library' as parameter to invoke pyexcel-io's built-in csv, tsv, csvz, tsvz, django and sql won't work. It is @@ -207,16 +310,14 @@ Updated handle are made sure to be closed. File close mechanism is enfored. #. iget_data function is introduced to cope with dangling file handle issue. -Removed -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Removed** #. Removed plugin loading code and lml is used instead 0.3.4 - 18.05.2017 -------------------------------------------------------------------------------- -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#33 `_, handle mmap object differently given as file content. This issue has put in a priority to single @@ -232,16 +333,14 @@ Updated 0.3.3 - 30.03.2017 -------------------------------------------------------------------------------- -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#31 `_, support pyinstaller 0.3.2 - 26.01.2017 -------------------------------------------------------------------------------- -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#29 `_, change skip_empty_rows to False by default @@ -249,13 +348,11 @@ Updated 0.3.1 - 21.01.2017 -------------------------------------------------------------------------------- -Added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Added** #. updated versions of extra packages -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#23 `_, provide helpful message when old pyexcel plugin exists @@ -264,8 +361,7 @@ Updated 0.3.0 - 22.12.2016 -------------------------------------------------------------------------------- -Added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Added** #. lazy loading of plugins. for example, pyexcel-xls is not entirely loaded until xls format is used at its first attempted reading or writing. Since it @@ -276,16 +372,14 @@ Added 0.2.6 - 21.12.2016 -------------------------------------------------------------------------------- -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#24 `__, pass on batch_size 0.2.5 - 20.12.2016 -------------------------------------------------------------------------------- -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#26 `__, performance issue with getting the number of columns. @@ -293,8 +387,7 @@ Updated 0.2.4 - 24.11.2016 -------------------------------------------------------------------------------- -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#23 `__, Failed to convert long integer string in python 2 to its actual value @@ -302,8 +395,7 @@ Updated 0.2.3 - 16.09.2016 -------------------------------------------------------------------------------- -Added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Added** #. `#21 `__, choose subset from data base tables for export @@ -313,16 +405,14 @@ Added 0.2.2 - 31.08.2016 -------------------------------------------------------------------------------- -Added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Added** #. support pagination. two pairs: start_row, row_limit and start_column, column_limit help you deal with large files. #. `skip_empty_rows=True` was introduced. To include empty rows, put it to False. -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. `#20 `__, pyexcel-io attempts to parse cell contents of 'infinity' as a float/int, crashes @@ -330,8 +420,7 @@ Updated 0.2.1 - 11.07.2016 -------------------------------------------------------------------------------- -Added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Added** #. csv format: handle utf-16 encoded csv files. Potentially being able to decode other formats if correct "encoding" is provided @@ -339,8 +428,7 @@ Added supported #. support stdin as input stream and stdout as output stream -Updated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Updated** #. Attention, user of pyexcel-io! No longer io stream validation is performed in python 3. The guideline is: io.StringIO for csv, tsv only, otherwise BytesIO @@ -352,8 +440,7 @@ Updated 0.2.0 - 01.06.2016 -------------------------------------------------------------------------------- -Added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Added** #. autoload of pyexcel-io plugins #. auto detect `datetime`, `float` and `int`. Detection can be switched off by @@ -362,7 +449,6 @@ Added 0.1.0 - 17.01.2016 -------------------------------------------------------------------------------- -Added -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Added** #. yield key word to return generator as content diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst new file mode 100644 index 0000000..45bb32d --- /dev/null +++ b/CONTRIBUTORS.rst @@ -0,0 +1,13 @@ + + +6 contributors +================================================================================ + +In alphabetical order: + +* `Craig Anderson `_ +* `John Vandenberg `_ +* `Stephen J. Fuhry `_ +* `Stephen Rauch `_ +* `Vincent Raspal `_ +* `Víctor Antonio Hernández Monroy `_ diff --git a/LICENSE b/LICENSE index e763169..8bb697c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2015-2019 by Onni Software Ltd. and its contributors +Copyright (c) 2015-2022 by Onni Software Ltd. and its contributors All rights reserved. Redistribution and use in source and binary forms of the software as well @@ -13,7 +13,7 @@ that the following conditions are met: and/or other materials provided with the distribution. * Neither the name of 'pyexcel-io' nor the names of the contributors - may be used to endorse or promote products derived from this software + may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND diff --git a/MANIFEST.in b/MANIFEST.in index 24eba87..cadad1b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,8 +1,8 @@ include README.rst include LICENSE include CHANGELOG.rst +include CONTRIBUTORS.rst recursive-include tests * -include docs/source/* +recursive-include docs * include Makefile include test.sh - diff --git a/Makefile b/Makefile index 7f442b5..3e0ee51 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,16 @@ all: test -test: +test: lint bash test.sh -document: - sphinx-autogen -o docs/source/generated/ docs/source/*.rst - sphinx-build -b html docs/source/ docs/build/ +install_test: + pip install -r tests/requirements.txt + +lint: + bash lint.sh + +format: + bash format.sh + +git-diff-check: + git diff --exit-code diff --git a/README.rst b/README.rst index 30562fd..c4f43b7 100644 --- a/README.rst +++ b/README.rst @@ -3,20 +3,37 @@ pyexcel-io - Let you focus on data, instead of file formats ================================================================================ .. image:: https://raw.githubusercontent.com/pyexcel/pyexcel.github.io/master/images/patreon.png - :target: https://www.patreon.com/pyexcel + :target: https://www.patreon.com/chfw -.. image:: https://api.bountysource.com/badge/team?team_id=288537 - :target: https://salt.bountysource.com/teams/chfw-pyexcel +.. image:: https://raw.githubusercontent.com/pyexcel/pyexcel-mobans/master/images/awesome-badge.svg + :target: https://awesome-python.com/#specific-formats-processing -.. image:: https://travis-ci.org/pyexcel/pyexcel-io.svg?branch=master - :target: http://travis-ci.org/pyexcel/pyexcel-io +.. image:: https://github.com/pyexcel/pyexcel-io/workflows/run_tests/badge.svg + :target: http://github.com/pyexcel/pyexcel-io/actions .. image:: https://codecov.io/gh/pyexcel/pyexcel-io/branch/master/graph/badge.svg :target: https://codecov.io/gh/pyexcel/pyexcel-io +.. image:: https://badge.fury.io/py/pyexcel-io.svg + :target: https://pypi.org/project/pyexcel-io + +.. image:: https://anaconda.org/conda-forge/pyexcel-io/badges/version.svg + :target: https://anaconda.org/conda-forge/pyexcel-io + +.. image:: https://pepy.tech/badge/pyexcel-io/month + :target: https://pepy.tech/project/pyexcel-io + +.. image:: https://anaconda.org/conda-forge/pyexcel-io/badges/downloads.svg + :target: https://anaconda.org/conda-forge/pyexcel-io + .. image:: https://img.shields.io/gitter/room/gitterHQ/gitter.svg :target: https://gitter.im/pyexcel/Lobby +.. image:: https://img.shields.io/static/v1?label=continuous%20templating&message=%E6%A8%A1%E7%89%88%E6%9B%B4%E6%96%B0&color=blue&style=flat-square + :target: https://moban.readthedocs.io/en/latest/#at-scale-continous-templating-for-open-source-projects + +.. image:: https://img.shields.io/static/v1?label=coding%20style&message=black&color=black&style=flat-square + :target: https://github.com/psf/black .. image:: https://readthedocs.org/projects/pyexcel-io/badge/?version=latest :target: http://pyexcel-io.readthedocs.org/en/latest/ @@ -24,7 +41,7 @@ Support the project ================================================================================ If your company has embedded pyexcel and its components into a revenue generating -product, please support me on `patreon `_ +product, please support me on github, `patreon `_ or `bounty source `_ to maintain the project and develop it further. @@ -43,6 +60,8 @@ Known constraints Fonts, colors and charts are not supported. +Nor to read password protected xls, xlsx and ods files. + Introduction ================================================================================ @@ -57,48 +76,47 @@ sqlalchemy supported databases. Its supported file formats are extended to cover .. table:: A list of file formats supported by external plugins - ======================== ======================= ================= ================== - Package name Supported file formats Dependencies Python versions - ======================== ======================= ================= ================== - `pyexcel-io`_ csv, csvz [#f1]_, tsv, 2.6, 2.7, 3.3, - tsvz [#f2]_ 3.4, 3.5, 3.6 - pypy - `pyexcel-xls`_ xls, xlsx(read only), `xlrd`_, same as above + ======================== ======================= ================= + Package name Supported file formats Dependencies + ======================== ======================= ================= + `pyexcel-io`_ csv, csvz [#f1]_, tsv, + tsvz [#f2]_ + `pyexcel-xls`_ xls, xlsx(read only), `xlrd`_, xlsm(read only) `xlwt`_ - `pyexcel-xlsx`_ xlsx `openpyxl`_ same as above - `pyexcel-ods3`_ ods `pyexcel-ezodf`_, 2.6, 2.7, 3.3, 3.4 - lxml 3.5, 3.6 - `pyexcel-ods`_ ods `odfpy`_ same as above - ======================== ======================= ================= ================== + `pyexcel-xlsx`_ xlsx `openpyxl`_ + `pyexcel-ods3`_ ods `pyexcel-ezodf`_, + lxml + `pyexcel-ods`_ ods `odfpy`_ + ======================== ======================= ================= .. table:: Dedicated file reader and writers - ======================== ======================= ================= ================== - Package name Supported file formats Dependencies Python versions - ======================== ======================= ================= ================== - `pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_ Python 2 and 3 - `pyexcel-xlsxr`_ xlsx(read only) lxml same as above - `pyexcel-odsr`_ read only for ods, fods lxml same as above - `pyexcel-htmlr`_ html(read only) lxml,html5lib same as above - ======================== ======================= ================= ================== + ======================== ======================= ================= + Package name Supported file formats Dependencies + ======================== ======================= ================= + `pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_ + `pyexcel-libxlsxw`_ xlsx(write only) `libxlsxwriter`_ + `pyexcel-xlsxr`_ xlsx(read only) lxml + `pyexcel-xlsbr`_ xlsb(read only) pyxlsb + `pyexcel-odsr`_ read only for ods, fods lxml + `pyexcel-odsw`_ write only for ods loxun + `pyexcel-htmlr`_ html(read only) lxml,html5lib + `pyexcel-pdfr`_ pdf(read only) camelot + ======================== ======================= ================= -.. _pyexcel-io: https://github.com/pyexcel/pyexcel-io -.. _pyexcel-xls: https://github.com/pyexcel/pyexcel-xls -.. _pyexcel-xlsx: https://github.com/pyexcel/pyexcel-xlsx -.. _pyexcel-ods: https://github.com/pyexcel/pyexcel-ods -.. _pyexcel-ods3: https://github.com/pyexcel/pyexcel-ods3 -.. _pyexcel-odsr: https://github.com/pyexcel/pyexcel-odsr -.. _pyexcel-xlsxw: https://github.com/pyexcel/pyexcel-xlsxw -.. _pyexcel-xlsxr: https://github.com/pyexcel/pyexcel-xlsxr -.. _pyexcel-htmlr: https://github.com/pyexcel/pyexcel-htmlr +Plugin shopping guide +------------------------ -.. _xlrd: https://github.com/python-excel/xlrd -.. _xlwt: https://github.com/python-excel/xlwt -.. _openpyxl: https://bitbucket.org/openpyxl/openpyxl -.. _XlsxWriter: https://github.com/jmcnamara/XlsxWriter -.. _pyexcel-ezodf: https://github.com/pyexcel/pyexcel-ezodf -.. _odfpy: https://github.com/eea/odfpy +Since 2020, all pyexcel-io plugins have dropped the support for python versions +which are lower than 3.6. If you want to use any of those Python versions, please use pyexcel-io +and its plugins versions that are lower than 0.6.0. + + +Except csv files, xls, xlsx and ods files are a zip of a folder containing a lot of +xml files + +The dedicated readers for excel files can stream read In order to manage the list of plugins installed, you need to use pip to add or remove @@ -108,6 +126,32 @@ in your environment, you need to tell pyexcel which plugin to use per function c For example, pyexcel-ods and pyexcel-odsr, and you want to get_array to use pyexcel-odsr. You need to append get_array(..., library='pyexcel-odsr'). + + +.. _pyexcel-io: https://github.com/pyexcel/pyexcel-io +.. _pyexcel-xls: https://github.com/pyexcel/pyexcel-xls +.. _pyexcel-xlsx: https://github.com/pyexcel/pyexcel-xlsx +.. _pyexcel-ods: https://github.com/pyexcel/pyexcel-ods +.. _pyexcel-ods3: https://github.com/pyexcel/pyexcel-ods3 +.. _pyexcel-odsr: https://github.com/pyexcel/pyexcel-odsr +.. _pyexcel-odsw: https://github.com/pyexcel/pyexcel-odsw +.. _pyexcel-pdfr: https://github.com/pyexcel/pyexcel-pdfr + +.. _pyexcel-xlsxw: https://github.com/pyexcel/pyexcel-xlsxw +.. _pyexcel-libxlsxw: https://github.com/pyexcel/pyexcel-libxlsxw +.. _pyexcel-xlsxr: https://github.com/pyexcel/pyexcel-xlsxr +.. _pyexcel-xlsbr: https://github.com/pyexcel/pyexcel-xlsbr +.. _pyexcel-htmlr: https://github.com/pyexcel/pyexcel-htmlr + +.. _xlrd: https://github.com/python-excel/xlrd +.. _xlwt: https://github.com/python-excel/xlwt +.. _openpyxl: https://bitbucket.org/openpyxl/openpyxl +.. _XlsxWriter: https://github.com/jmcnamara/XlsxWriter +.. _pyexcel-ezodf: https://github.com/pyexcel/pyexcel-ezodf +.. _odfpy: https://github.com/eea/odfpy +.. _libxlsxwriter: http://libxlsxwriter.github.io/getting_started.html + + .. rubric:: Footnotes .. [#f1] zipped csv file @@ -161,7 +205,7 @@ Then install relevant development requirements: #. pip install -r tests/requirements.txt Once you have finished your changes, please provide test case(s), relevant documentation -and update CHANGELOG.rst. +and update changelog.yml .. note:: @@ -180,41 +224,20 @@ On Linux/Unix systems, please launch your tests like this:: $ make -On Windows systems, please issue this command:: +On Windows, please issue this command:: > test.bat -How to update test environment and update documentation ---------------------------------------------------------- -Additional steps are required: +Before you commit +------------------------------ -#. pip install moban -#. git clone https://github.com/moremoban/setupmobans.git # generic setup -#. git clone https://github.com/pyexcel/pyexcel-commons.git commons -#. make your changes in `.moban.d` directory, then issue command `moban` +Please run:: -What is pyexcel-commons ---------------------------------- + $ make format -Many information that are shared across pyexcel projects, such as: this developer guide, license info, etc. are stored in `pyexcel-commons` project. +so as to beautify your code otherwise your build may fail your unit test. -What is .moban.d ---------------------------------- - -`.moban.d` stores the specific meta data for the library. - -Acceptance criteria -------------------- - -#. Has Test cases written -#. Has all code lines tested -#. Passes all Travis CI builds -#. Has fair amount of documentation if your change is complex -#. run 'make format' so as to confirm the pyexcel organisation's coding style -#. Please update CHANGELOG.rst -#. Please add yourself to CONTRIBUTORS.rst -#. Agree on NEW BSD License for your contribution diff --git a/changelog.yml b/changelog.yml index d51819f..e9244a9 100644 --- a/changelog.yml +++ b/changelog.yml @@ -1,6 +1,99 @@ name: pyexcel-io organisation: pyexcel releases: +- changes: + - action: updated + details: + - "`#112`: Log Empty Row Warning instead 'print' " + version: 0.6.6 + date: 31.1.2022 +- changes: + - action: updated + details: + - "`#109`: enable ods3 to have datetime" + version: 0.6.5 + date: 08.10.2021 +- changes: + - action: updated + details: + - "`#102`: skip columns from imported excel sheet." + version: 0.6.4 + date: 31.10.2020 +- changes: + - action: fixed + details: + - "`#96`: regression: unknown file type shall trigger NoSupportingPluginFound" + - action: updated + details: + - "extra dependencies uses 0.6.0 based plugins" + version: 0.6.3 + date: 12.10.2020 +- changes: + - action: updated + details: + - "`#94`: keep backward compatibility for pyexcel-xls 0.4.1" + version: 0.6.2 + date: 7.10.2020 +- changes: + - action: removed + details: + - 'python 3.6 lower versions are no longer supported' + - action: updated + details: + - pyexcel-io plugin interface has been rewritten. PyInstaller user will be impacted. + please read 'Packaging with Pyinstaller' in the documentation. + - new query set reader plugin. pyexcel<=0.6.4 has used intrusive way of getting query set + source done. it is against the plugin interface. + - action: fixed + details: + - "`#74`: handle zip files which contain non-UTF-8 encoded files." + - action: added + details: + - "`#86`: allow trailing options, get_data(...keep_trailing_empty_cells=True)." + version: 0.6.1 + date: 7.10.2020 +- changes: + - action: updated + details: + - '`#70`: when the given file is a root directory, the error shall read it is not a file' + version: 0.5.20 + date: 17.7.2019 +- changes: + - action: updated + details: + - '`pyexcel#185`: handle stream conversion if file type(html) needs string content then bytes to handle' + version: 0.5.19 + date: 14.7.2019 +- changes: + - action: updated + details: + - '`#69`: Force file type(force_file_type) on write' + version: 0.5.18 + date: 12.06.2019 +- changes: + - action: updated + details: + - '`#68`: Raise IOError when the data file does not exist' + version: 0.5.17 + date: 04.04.2019 +- changes: + - action: updated + details: + - '`#67`: fix conversion issue for long type on python 2.7 for ods' + version: 0.5.16 + date: 19.03.2019 +- changes: + - action: updated + details: + - '`pyexcel-ods#33`: fix integer comparision error on i586' + version: 0.5.15 + date: 16.03.2019 +- changes: + - action: updated + details: + - '`#65`: add tests/__init__.py because python2.7 setup.py test needs it' + version: 0.5.14 + date: 21.02.2019 - changes: - action: updated details: @@ -18,7 +111,7 @@ releases: - action: updated details: - '`#59`: Please use scan_plugins_regex, which lml 0.7 complains about' - version: 0.5.10 + version: 0.5.11 date: 3.12.2018 - changes: - action: added diff --git a/docs/source/api/pyexcel_io.get_data.rst b/docs/source/api/pyexcel_io.get_data.rst index 671a8d1..9f3c898 100644 --- a/docs/source/api/pyexcel_io.get_data.rst +++ b/docs/source/api/pyexcel_io.get_data.rst @@ -1,5 +1,5 @@ -pyexcel_io.get_data -=================== +pyexcel\_io.get\_data +===================== .. currentmodule:: pyexcel_io diff --git a/docs/source/api/pyexcel_io.iget_data.rst b/docs/source/api/pyexcel_io.iget_data.rst new file mode 100644 index 0000000..4b21126 --- /dev/null +++ b/docs/source/api/pyexcel_io.iget_data.rst @@ -0,0 +1,6 @@ +pyexcel\_io.iget\_data +====================== + +.. currentmodule:: pyexcel_io + +.. autofunction:: iget_data \ No newline at end of file diff --git a/docs/source/api/pyexcel_io.save_data.rst b/docs/source/api/pyexcel_io.save_data.rst index 791f311..fcece28 100644 --- a/docs/source/api/pyexcel_io.save_data.rst +++ b/docs/source/api/pyexcel_io.save_data.rst @@ -1,5 +1,5 @@ -pyexcel_io.save_data -==================== +pyexcel\_io.save\_data +====================== .. currentmodule:: pyexcel_io diff --git a/docs/source/common_parameters.rst b/docs/source/common_parameters.rst index 92926e5..70e01c9 100644 --- a/docs/source/common_parameters.rst +++ b/docs/source/common_parameters.rst @@ -2,9 +2,26 @@ Common parameters ================================================================================ +'library' option is added +-------------------------------------------------------------------------------- + +In order to have overlapping plugins co-exist, 'library' option is added to +get_data and save_data. + + +get_data only parameters +-------------------------------------------------------------------------------- + +keep_trailing_empty_cells +******************************************************************************** + +default: False + +If turned on, the return data will contain trailing empty cells. + auto_dectect_datetime --------------------------------------------------------------------------------- +******************************************************************************** The datetime formats are: @@ -14,11 +31,6 @@ The datetime formats are: Any other datetime formats will be thrown as ValueError -'library' option is added --------------------------------------------------------------------------------- - -In order to have overlapping plugins co-exit, 'library' option is added to -get_data and save_data. csv only parameters -------------------------------------------------------------------------------- diff --git a/docs/source/conf.py b/docs/source/conf.py index 29f2cd4..7d8e0cc 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -4,13 +4,11 @@ DESCRIPTION = ( 'format and to/from databases' + '' ) -# -*- coding: utf-8 -*- -# # Configuration file for the Sphinx documentation builder. # -# This file does only contain a selection of the most common options. For a -# full list see the documentation: -# http://www.sphinx-doc.org/en/master/config +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Path setup -------------------------------------------------------------- @@ -24,39 +22,24 @@ DESCRIPTION = ( # -- Project information ----------------------------------------------------- -project = u'pyexcel-io' -copyright = u'2015-2018 Onni Software Ltd.' -author = u'C.W.' - +project = 'pyexcel-io' +copyright = '2015-2022 Onni Software Ltd.' +author = 'C.W.' # The short X.Y version -version = u'0.5.11' +version = '0.6.6' # The full version, including alpha/beta/rc tags -release = u'0.5.11' - +release = '0.6.6' # -- General configuration --------------------------------------------------- -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.viewcode',] +extensions = [ 'sphinx.ext.autosummary', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.viewcode', 'sphinx.ext.autodoc',] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # @@ -69,9 +52,6 @@ language = 'en' # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = None - # -- Options for HTML output ------------------------------------------------- @@ -80,107 +60,16 @@ pygments_style = None # html_theme = 'alabaster' -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# The default sidebars (for documents that don't match any pattern) are -# defined by theme itself. Builtin themes are using these templates by -# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', -# 'searchbox.html']``. -# -# html_sidebars = {} - - -# -- Options for HTMLHelp output --------------------------------------------- - -# Output file base name for HTML help builder. -htmlhelp_basename = 'pyexcel-iodoc' - - -# -- Options for LaTeX output ------------------------------------------------ - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'pyexcel-io.tex', u'pyexcel-io Documentation', - u'Onni Software Ltd.', 'manual'), -] - - -# -- Options for manual page output ------------------------------------------ - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'pyexcel-io', u'pyexcel-io Documentation', - [author], 1) -] - - -# -- Options for Texinfo output ---------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'pyexcel-io', u'pyexcel-io Documentation', - author, 'pyexcel-io', 'One line description of project.', - 'Miscellaneous'), -] - - -# -- Options for Epub output ------------------------------------------------- - -# Bibliographic Dublin Core info. -epub_title = project - -# The unique identifier of the text. This can be a ISBN number -# or the project homepage. -# -# epub_identifier = '' - -# A unique identification for the text. -# -# epub_uid = '' - -# A list of files that should not be packed into the epub file. -epub_exclude_files = ['search.html'] - # -- Extension configuration ------------------------------------------------- # -- Options for intersphinx extension --------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = {'https://docs.python.org/3/': None} # TODO: html_theme not configurable upstream html_theme = 'default' @@ -201,3 +90,4 @@ texinfo_documents = [ intersphinx_mapping.update({ 'pyexcel': ('http://pyexcel.readthedocs.io/en/latest/', None), }) +master_doc = "index" diff --git a/docs/source/extendedcsv.rst b/docs/source/extendedcsv.rst index 64ba764..79318fd 100644 --- a/docs/source/extendedcsv.rst +++ b/docs/source/extendedcsv.rst @@ -74,6 +74,8 @@ Continue from previous example:: :hide: >>> import os + >>> if os.path.exists("your_file.csv"): + ... os.unlink("your_file.csv") >>> os.unlink("your_file__Sheet 1__0.csv") >>> os.unlink("your_file__Sheet 2__1.csv") diff --git a/docs/source/extensions.rst b/docs/source/extensions.rst index 20456d2..f436227 100644 --- a/docs/source/extensions.rst +++ b/docs/source/extensions.rst @@ -1,14 +1,160 @@ -Working with xls, xlsx, and ods formats +Extend pyexcel-io for other excel or tabular formats ================================================================================ +You are welcome to extend pyexcel-io to read and write more tabular formats. +No. 1 rule, your plugin must have a prefix 'pyexcel_' in its module path. +For example, `pyexcel-xls` has 'pyexcel_xls' as its module path. Otherwise, +pyexcel-io will not load your plugin. + +On github, you will find two examples in `examples` folder. This section +explains its implementations to help you write yours. + .. note:: No longer, you will need to do explicit imports for pyexcel-io extensions. Instead, you install them and manage them via pip. -Work with physical file +Simple Reader for a yaml file +-------------------------------------------------------------------------------- + +Suppose we have a yaml file, containing a dictionary where the values are +two dimensional array. The task is to write a reader plugin to pyexcel-io so that +we can use get_data() to read yaml file out. + +.. literalinclude:: ../../examples/test.yaml + :language: yaml + +**Implement IReader** + +First, let's implement reader interface: + +1. `content_array` attribute, is expected to be a list of `NamedContent` +2. `read_sheet` function, read sheet content by its index. +3. `close` function, to clean up any file handle + +.. literalinclude:: ../../examples/custom_yaml_reader.py + :language: python + :lines: 19-33 + +**Implement ISheet** + +`YourSingleSheet` makes this simple task complex in order to show case its inner +workings. Two abstract functions require implementation: + +1. `row_iterator`: should return a row: either content arary or content index as long as + `column_iterator` can use it to return the cell value. + +2. `column_iterator`: should iterate cell value from the given row. + +.. literalinclude:: ../../examples/custom_yaml_reader.py + :language: python + :lines: 8-16 + + +**Plug in pyexcel-io** + +Last thing is to register with pyexcel-io about your new reader. `relative_plugin_class_path` +meant reference from current module, how to refer to `YourReader`. `locations` meant +the physical presence of the data source: "file", "memory" or "content". "file" means +files on physical disk. "memory" means a file stream. "content" means a string buffer. +`stream_type` meant the type of the stream: binary for BytesIO and text for StringIO. + +.. literalinclude:: ../../examples/custom_yaml_reader.py + :language: python + :lines: 36-41 + +Usually, this registration code was placed in __init__.py file at the top level of your +extension source tree. You can take a look at any pyexcel plugins for reference. + +**Test your reader** + +Let's run the following code and see if it works. + +.. literalinclude:: ../../examples/custom_yaml_reader.py + :language: python + :lines: 43-45 + + +You would see these in standard output: + +.. code-block:: bash + + $ python custom_yaml_reader.py + OrderedDict([('sheet 1', [[1, 2, 3], [2, 3, 4]]), ('sheet 2', [['A', 'B', 'C']])]) + +A writer to write content in yaml +-------------------------------------------------------------------------------- + +Now for the writer, let's write a pyexcel-io writer that write a dictionary of +two dimentaional arrays back into a yaml file seen above. + +**Implement IWriter** + +Two abstract functions are required: + +1. `create_sheet` creates a native sheet by sheet name, that understands how to code up the native sheet. Interestingly, it returns your sheet. +2. `close` function closes file handle if any. + +.. literalinclude:: ../../examples/custom_yaml_writer.py + :language: python + :lines: 18-30 + +**Implement ISheetWriter** + +It is imagined that you will have your own sheet writer. You simply need to figure +out how to write a row. Row by row write action was already written by `ISheetWrier`. + + +.. literalinclude:: ../../examples/custom_yaml_writer.py + :language: python + :lines: 7-14 + +**Plug in pyexcel-io** + +Like the reader plugin, we register a writer. + +.. literalinclude:: ../../examples/custom_yaml_writer.py + :language: python + :lines: 33-38 + +**Test It** + +Let's run the following code and please examine `mytest.yaml` yourself. + +.. literalinclude:: ../../examples/custom_yaml_writer.py + :language: python + :lines: 40-46 + +And you shall find a file named 'mytest.yaml': + + +.. code-block:: bash + + $ cat mytest.yaml + sheet 1: + - - 1 + - 3 + - 4 + - - 2 + - 4 + - 9 + sheet 2: + - - B + - C + - D + + +Other pyexcel-io plugins ----------------------------------------------------------------------------- +Get xls support + +.. code-block:: + + + $ pip install pyexcel-xls + + Here's what is needed:: >>> from pyexcel_io import save_data @@ -23,27 +169,6 @@ And you can also get the data back:: [[1, 2, 3]] -Work with memory file ------------------------------------------------------------------------------ - -Here is the sample code to work with memory file:: - - >>> from pyexcel_io.manager import get_io - >>> io = get_io("xls") - >>> data = [[1,2,3]] - >>> save_data(io, data, "xls") - -The difference is that you have mention file type if you use :meth:`pyexcel_io.save_data` - -And you can also get the data back:: - - >>> data = get_data(io, "xls") - >>> data['pyexcel_sheet1'] - [[1, 2, 3]] - -The same applies to :meth:`pyexcel_io.get_data`. - - Other formats ----------------------------------------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index af27e3a..baf6fd9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -64,48 +64,47 @@ For individual excel file formats, please install them as you wish: .. table:: A list of file formats supported by external plugins - ======================== ======================= ================= ================== - Package name Supported file formats Dependencies Python versions - ======================== ======================= ================= ================== - `pyexcel-io`_ csv, csvz [#f1]_, tsv, 2.6, 2.7, 3.3, - tsvz [#f2]_ 3.4, 3.5, 3.6 - pypy - `pyexcel-xls`_ xls, xlsx(read only), `xlrd`_, same as above + ======================== ======================= ================= + Package name Supported file formats Dependencies + ======================== ======================= ================= + `pyexcel-io`_ csv, csvz [#f1]_, tsv, + tsvz [#f2]_ + `pyexcel-xls`_ xls, xlsx(read only), `xlrd`_, xlsm(read only) `xlwt`_ - `pyexcel-xlsx`_ xlsx `openpyxl`_ same as above - `pyexcel-ods3`_ ods `pyexcel-ezodf`_, 2.6, 2.7, 3.3, 3.4 - lxml 3.5, 3.6 - `pyexcel-ods`_ ods `odfpy`_ same as above - ======================== ======================= ================= ================== + `pyexcel-xlsx`_ xlsx `openpyxl`_ + `pyexcel-ods3`_ ods `pyexcel-ezodf`_, + lxml + `pyexcel-ods`_ ods `odfpy`_ + ======================== ======================= ================= .. table:: Dedicated file reader and writers - ======================== ======================= ================= ================== - Package name Supported file formats Dependencies Python versions - ======================== ======================= ================= ================== - `pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_ Python 2 and 3 - `pyexcel-xlsxr`_ xlsx(read only) lxml same as above - `pyexcel-odsr`_ read only for ods, fods lxml same as above - `pyexcel-htmlr`_ html(read only) lxml,html5lib same as above - ======================== ======================= ================= ================== + ======================== ======================= ================= + Package name Supported file formats Dependencies + ======================== ======================= ================= + `pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_ + `pyexcel-libxlsxw`_ xlsx(write only) `libxlsxwriter`_ + `pyexcel-xlsxr`_ xlsx(read only) lxml + `pyexcel-xlsbr`_ xlsb(read only) pyxlsb + `pyexcel-odsr`_ read only for ods, fods lxml + `pyexcel-odsw`_ write only for ods loxun + `pyexcel-htmlr`_ html(read only) lxml,html5lib + `pyexcel-pdfr`_ pdf(read only) camelot + ======================== ======================= ================= -.. _pyexcel-io: https://github.com/pyexcel/pyexcel-io -.. _pyexcel-xls: https://github.com/pyexcel/pyexcel-xls -.. _pyexcel-xlsx: https://github.com/pyexcel/pyexcel-xlsx -.. _pyexcel-ods: https://github.com/pyexcel/pyexcel-ods -.. _pyexcel-ods3: https://github.com/pyexcel/pyexcel-ods3 -.. _pyexcel-odsr: https://github.com/pyexcel/pyexcel-odsr -.. _pyexcel-xlsxw: https://github.com/pyexcel/pyexcel-xlsxw -.. _pyexcel-xlsxr: https://github.com/pyexcel/pyexcel-xlsxr -.. _pyexcel-htmlr: https://github.com/pyexcel/pyexcel-htmlr +Plugin shopping guide +------------------------ -.. _xlrd: https://github.com/python-excel/xlrd -.. _xlwt: https://github.com/python-excel/xlwt -.. _openpyxl: https://bitbucket.org/openpyxl/openpyxl -.. _XlsxWriter: https://github.com/jmcnamara/XlsxWriter -.. _pyexcel-ezodf: https://github.com/pyexcel/pyexcel-ezodf -.. _odfpy: https://github.com/eea/odfpy +Since 2020, all pyexcel-io plugins have dropped the support for python versions +which are lower than 3.6. If you want to use any of those Python versions, please use pyexcel-io +and its plugins versions that are lower than 0.6.0. + + +Except csv files, xls, xlsx and ods files are a zip of a folder containing a lot of +xml files + +The dedicated readers for excel files can stream read In order to manage the list of plugins installed, you need to use pip to add or remove @@ -115,6 +114,32 @@ in your environment, you need to tell pyexcel which plugin to use per function c For example, pyexcel-ods and pyexcel-odsr, and you want to get_array to use pyexcel-odsr. You need to append get_array(..., library='pyexcel-odsr'). + + +.. _pyexcel-io: https://github.com/pyexcel/pyexcel-io +.. _pyexcel-xls: https://github.com/pyexcel/pyexcel-xls +.. _pyexcel-xlsx: https://github.com/pyexcel/pyexcel-xlsx +.. _pyexcel-ods: https://github.com/pyexcel/pyexcel-ods +.. _pyexcel-ods3: https://github.com/pyexcel/pyexcel-ods3 +.. _pyexcel-odsr: https://github.com/pyexcel/pyexcel-odsr +.. _pyexcel-odsw: https://github.com/pyexcel/pyexcel-odsw +.. _pyexcel-pdfr: https://github.com/pyexcel/pyexcel-pdfr + +.. _pyexcel-xlsxw: https://github.com/pyexcel/pyexcel-xlsxw +.. _pyexcel-libxlsxw: https://github.com/pyexcel/pyexcel-libxlsxw +.. _pyexcel-xlsxr: https://github.com/pyexcel/pyexcel-xlsxr +.. _pyexcel-xlsbr: https://github.com/pyexcel/pyexcel-xlsbr +.. _pyexcel-htmlr: https://github.com/pyexcel/pyexcel-htmlr + +.. _xlrd: https://github.com/python-excel/xlrd +.. _xlwt: https://github.com/python-excel/xlwt +.. _openpyxl: https://bitbucket.org/openpyxl/openpyxl +.. _XlsxWriter: https://github.com/jmcnamara/XlsxWriter +.. _pyexcel-ezodf: https://github.com/pyexcel/pyexcel-ezodf +.. _odfpy: https://github.com/eea/odfpy +.. _libxlsxwriter: http://libxlsxwriter.github.io/getting_started.html + + .. rubric:: Footnotes .. [#f1] zipped csv file @@ -138,6 +163,7 @@ get_data(.., library='pyexcel-ods') ============= ======= ======== ======= ======== ======== ======== `pyexcel-io`_ `xls`_ `xlsx`_ `ods`_ `ods3`_ `odsr`_ `xlsxw`_ ============= ======= ======== ======= ======== ======== ======== + 0.6.0+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+ 0.5.10+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+ 0.5.1+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x @@ -188,6 +214,7 @@ API .. autosummary:: :toctree: api/ + iget_data get_data save_data diff --git a/docs/source/pagination.rst b/docs/source/pagination.rst index 51eeafe..cb9181a 100644 --- a/docs/source/pagination.rst +++ b/docs/source/pagination.rst @@ -60,3 +60,9 @@ Obvious, you could do both at the same time: The pagination support is available across all pyexcel-io plugins. +.. testcode:: + :hide: + + >>> import os + >>> if os.path.exists("your_file.csv"): + ... os.unlink("your_file.csv") diff --git a/docs/source/plaincsv.rst b/docs/source/plaincsv.rst index fe8f96c..c009627 100644 --- a/docs/source/plaincsv.rst +++ b/docs/source/plaincsv.rst @@ -153,7 +153,7 @@ Here is an example to write a sentence of "Shui Dial Getou"[#f2] into a csv file .. code-block:: python - >>> content = [[u'人有悲歡離合', u'月有陰晴圓缺']] + >>> content = [['löyly', 'löyly']] >>> test_file = "test-utf8-BOM.csv" >>> save_data(test_file, content, encoding="utf-8-sig", lineterminator="\n") @@ -172,4 +172,5 @@ When you read it back you will have to specify encoding too. >>> import os >>> os.unlink("your_file.csv") + >>> os.unlink("test-utf16-encoding.csv") >>> os.unlink(test_file) diff --git a/docs/source/pyinstaller.rst b/docs/source/pyinstaller.rst index 531ccc7..2db1c36 100644 --- a/docs/source/pyinstaller.rst +++ b/docs/source/pyinstaller.rst @@ -1,6 +1,27 @@ Packaging with PyInstaller ================================================================================ +With pyexcel-io v0.6.0, the way to package it has been changed because +plugin interface update. + +Built-in plugins for pyexcel-io +--------------------------------- + +In order to package every built-in plugins of pyexcel-io, you need to specify:: + + --hidden-import pyexcel_io.readers.csv_in_file + --hidden-import pyexcel_io.readers.csv_in_memory + --hidden-import pyexcel_io.readers.csv_content + --hidden-import pyexcel_io.readers.csvz + --hidden-import pyexcel_io.writers.csv_in_file + --hidden-import pyexcel_io.writers.csv_in_memory + --hidden-import pyexcel_io.writers.csvz_writer + --hidden-import pyexcel_io.database.importers.django + --hidden-import pyexcel_io.database.importers.sqlalchemy + --hidden-import pyexcel_io.database.exporters.django + --hidden-import pyexcel_io.database.exporters.sqlalchemy + + With pyexcel-io v0.4.0, the way to package it has been changed because it uses lml for all plugins. @@ -14,9 +35,9 @@ In order to package every built-in plugins of pyexcel-io, you need to specify:: --hidden-import pyexcel_io.readers.tsv --hidden-import pyexcel_io.readers.tsvz --hidden-import pyexcel_io.writers.csvw - --hidden-import pyexcel_io.readers.csvz - --hidden-import pyexcel_io.readers.tsv - --hidden-import pyexcel_io.readers.tsvz + --hidden-import pyexcel_io.writers.csvz + --hidden-import pyexcel_io.writers.tsv + --hidden-import pyexcel_io.writers.tsvz --hidden-import pyexcel_io.database.importers.django --hidden-import pyexcel_io.database.importers.sqlalchemy --hidden-import pyexcel_io.database.exporters.django diff --git a/docs/source/renderer.rst b/docs/source/renderer.rst index a10631e..2b5ab4d 100644 --- a/docs/source/renderer.rst +++ b/docs/source/renderer.rst @@ -1,5 +1,4 @@ Rendering(Formatting) the data - ================================================================================ You might want to do custom rendering on your data obtained. `row_renderer` was @@ -46,3 +45,10 @@ And you may want use row_renderer to customize it to string: >>> data = get_data("your_file.csv", row_renderer=my_renderer) >>> data['your_file.csv'] [['1', '21', '31'], ['2', '22', '32'], ['3', '23', '33']] + +.. testcode:: + :hide: + + >>> import os + >>> if os.path.exists("your_file.csv"): + ... os.unlink("your_file.csv") diff --git a/examples/custom_yaml_reader.py b/examples/custom_yaml_reader.py new file mode 100644 index 0000000..ef68606 --- /dev/null +++ b/examples/custom_yaml_reader.py @@ -0,0 +1,45 @@ +import yaml +from pyexcel_io import get_data +from pyexcel_io.sheet import NamedContent +from pyexcel_io.plugins import IOPluginInfoChainV2 +from pyexcel_io.plugin_api import ISheet, IReader + + +class YourSingleSheet(ISheet): + def __init__(self, your_native_sheet): + self.two_dimensional_array = your_native_sheet + + def row_iterator(self): + yield from self.two_dimensional_array + + def column_iterator(self, row): + yield from row + + +class YourReader(IReader): + def __init__(self, file_name, file_type, **keywords): + self.file_handle = open(file_name, "r") + self.native_book = yaml.load(self.file_handle) + self.content_array = [ + NamedContent(key, values) + for key, values in self.native_book.items() + ] + + def read_sheet(self, sheet_index): + two_dimensional_array = self.content_array[sheet_index].payload + return YourSingleSheet(two_dimensional_array) + + def close(self): + self.file_handle.close() + + +IOPluginInfoChainV2(__name__).add_a_reader( + relative_plugin_class_path="YourReader", + locations=["file"], + file_types=["yaml"], + stream_type="text", +) + +if __name__ == "__main__": + data = get_data("test.yaml") + print(data) diff --git a/examples/custom_yaml_writer.py b/examples/custom_yaml_writer.py new file mode 100644 index 0000000..1dc0cad --- /dev/null +++ b/examples/custom_yaml_writer.py @@ -0,0 +1,46 @@ +import yaml +from pyexcel_io import save_data +from pyexcel_io.plugins import IOPluginInfoChainV2 +from pyexcel_io.plugin_api import IWriter, ISheetWriter + + +class MySheetWriter(ISheetWriter): + def __init__(self, sheet_reference): + self.native_sheet = sheet_reference + + def write_row(self, data_row): + self.native_sheet.append(data_row) + + def close(self): + pass + + +class MyWriter(IWriter): + def __init__(self, file_name, file_type, **keywords): + self.file_name = file_name + self.content = {} + + def create_sheet(self, name): + array = [] + self.content[name] = array + return MySheetWriter(array) + + def close(self): + with open(self.file_name, "w") as f: + f.write(yaml.dump(self.content, default_flow_style=False)) + + +IOPluginInfoChainV2(__name__).add_a_writer( + relative_plugin_class_path="MyWriter", + locations=["file"], + file_types=["yaml"], + stream_type="text", +) + +if __name__ == "__main__": + data_dict = { + "sheet 1": [[1, 3, 4], [2, 4, 9]], + "sheet 2": [["B", "C", "D"]], + } + + save_data("mytest.yaml", data_dict) diff --git a/examples/test.yaml b/examples/test.yaml new file mode 100644 index 0000000..1e02d55 --- /dev/null +++ b/examples/test.yaml @@ -0,0 +1,11 @@ +sheet 1: +- - 1 + - 2 + - 3 +- - 2 + - 3 + - 4 +sheet 2: +- - A + - B + - C diff --git a/format.sh b/format.sh new file mode 100644 index 0000000..73a38db --- /dev/null +++ b/format.sh @@ -0,0 +1,3 @@ +isort $(find pyexcel_io -name "*.py"|xargs echo) $(find tests -name "*.py"|xargs echo) +black -l 79 pyexcel_io +black -l 79 tests diff --git a/lint.sh b/lint.sh new file mode 100644 index 0000000..d31eeaa --- /dev/null +++ b/lint.sh @@ -0,0 +1,2 @@ +pip install flake8 +flake8 --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long . && python setup.py checkdocs \ No newline at end of file diff --git a/pyexcel-io.yml b/pyexcel-io.yml index 894ea2d..7dad5e1 100644 --- a/pyexcel-io.yml +++ b/pyexcel-io.yml @@ -1,19 +1,27 @@ overrides: "pyexcel.yaml" -name: "pyexcel-io" +project: "pyexcel-io" +name: pyexcel-io nick_name: io -version: 0.5.13 -current_version: 0.5.13 -release: 0.5.13 +version: 0.6.6 +current_version: 0.6.6 +release: 0.6.6 +copyright_year: 2015-2022 +moban_command: false +is_on_conda: true dependencies: - - ordereddict;python_version<"2.7" - lml>=0.0.4 +test_dependencies: + - pyexcel + - pyexcel-xls==0.5.9 + - SQLAlchemy + - pyexcel-xlsxw extra_dependencies: - xls: - - pyexcel-xls>=0.5.0 + - pyexcel-xls>=0.6.0 - xlsx: - - pyexcel-xlsx>=0.5.0 + - pyexcel-xlsx>=0.6.0 - ods: - - pyexcel-ods3>=0.5.0 + - pyexcel-ods3>=0.6.0 keywords: - API - tsv @@ -22,4 +30,12 @@ keywords: - csvz - django - sqlalchemy +sphinx_extensions: + - sphinx.ext.autosummary + - sphinx.ext.doctest + - sphinx.ext.intersphinx + - sphinx.ext.viewcode + - sphinx.ext.autodoc description: A python library to read and write structured data in csv, zipped csv format and to/from databases +python_requires: ">=3.6" +min_python_version: "3.6" diff --git a/pyexcel_io/__init__.py b/pyexcel_io/__init__.py index b50c3e3..f215e48 100644 --- a/pyexcel_io/__init__.py +++ b/pyexcel_io/__init__.py @@ -4,17 +4,18 @@ Uniform interface for reading/writing different excel file formats - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ import logging + +import pyexcel_io.plugins as plugins + +from .io import get_data, iget_data, save_data # noqa from ._compact import NullHandler logging.getLogger(__name__).addHandler(NullHandler()) # noqa -from .io import get_data, iget_data, save_data # noqa -import pyexcel_io.plugins as plugins - BLACK_LIST = [__name__, "pyexcel_webio", "pyexcel_text"] WHITE_LIST = [ @@ -25,7 +26,5 @@ WHITE_LIST = [ PREFIX_PATTERN = "^pyexcel_.*$" plugins.load_plugins( - PREFIX_PATTERN, - __path__, # noqa: F821 - BLACK_LIST, - WHITE_LIST) + PREFIX_PATTERN, __path__, BLACK_LIST, WHITE_LIST # noqa: F821 +) diff --git a/pyexcel_io/_compact.py b/pyexcel_io/_compact.py index b0459f2..e9827ba 100644 --- a/pyexcel_io/_compact.py +++ b/pyexcel_io/_compact.py @@ -4,29 +4,13 @@ Compatibles - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -# flake8: noqa -# pylint: disable=import-error -# pylint: disable=invalid-name -# pylint: disable=too-few-public-methods -# pylint: disable=ungrouped-imports -# pylint: disable=redefined-variable-type import sys -import types import logging - -PY2 = sys.version_info[0] == 2 -PY3_ABOVE = sys.version_info[0] >= 3 -PY26 = PY2 and sys.version_info[1] < 7 -PY27 = PY2 and sys.version_info[1] == 7 -PY27_ABOVE = PY27 or PY3_ABOVE - -if PY26: - from ordereddict import OrderedDict -else: - from collections import OrderedDict +from io import BytesIO, StringIO # noqa: F401 +from collections import OrderedDict # noqa: F401 try: from logging import NullHandler @@ -37,28 +21,13 @@ except ImportError: pass -if PY2: - from cStringIO import StringIO - from cStringIO import StringIO as BytesIO - - text_type = unicode - irange = xrange - - class Iterator(object): - def next(self): - return type(self).__next__(self) - - -else: - from io import StringIO, BytesIO - - text_type = str - Iterator = object - irange = range +text_type = str +irange = range +PY2 = sys.version[0] == 2 def isstream(instance): - """ check if a instance is a stream """ + """check if a instance is a stream""" try: import mmap @@ -72,11 +41,4 @@ def isstream(instance): def is_string(atype): """find out if a type is str or not""" - if atype == str: - return True - - elif PY2: - if atype == unicode: - return True - - return False + return atype == str diff --git a/pyexcel_io/book.py b/pyexcel_io/book.py index 24e9b96..1948270 100644 --- a/pyexcel_io/book.py +++ b/pyexcel_io/book.py @@ -4,13 +4,22 @@ The io interface to file extensions - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ +import warnings + import pyexcel_io.manager as manager -from pyexcel_io._compact import OrderedDict, isstream, PY2 +from pyexcel_io._compact import OrderedDict, isstream + from .constants import MESSAGE_ERROR_03, MESSAGE_WRONG_IO_INSTANCE +DEPRECATED_SINCE_0_6_0 = ( + "Deprecated since v0.6.0! " + + "Although backward compatibility is preserved, " + + "it is recommended to upgrade to get new features." +) + class RWInterface(object): """ @@ -20,6 +29,7 @@ class RWInterface(object): stream_type = None def __init__(self): + warnings.warn(DEPRECATED_SINCE_0_6_0) self._file_type = None def open(self, file_name, **keywords): @@ -85,26 +95,15 @@ class BookReader(RWInterface): keywords are passed on to individual readers """ if isstream(file_stream): - if PY2: - if hasattr(file_stream, "seek"): - file_stream.seek(0) - else: - # python 2 - # Hei zipfile in odfpy would do a seek - # but stream from urlib cannot do seek - file_stream = _convert_content_to_stream( - file_stream.read(), self._file_type - ) - else: - from io import UnsupportedOperation + from io import UnsupportedOperation - try: - file_stream.seek(0) - except UnsupportedOperation: - # python 3 - file_stream = _convert_content_to_stream( - file_stream.read(), self._file_type - ) + try: + file_stream.seek(0) + except UnsupportedOperation: + # python 3 + file_stream = _convert_content_to_stream( + file_stream.read(), self._file_type + ) self._file_stream = file_stream self._keywords = keywords @@ -231,6 +230,17 @@ class BookWriter(RWInterface): def _convert_content_to_stream(file_content, file_type): stream = manager.get_io(file_type) + target_content_type = manager.get_io_type(file_type) + needs_encode = target_content_type == "bytes" and not isinstance( + file_content, bytes + ) + needs_decode = target_content_type == "string" and isinstance( + file_content, bytes + ) + if needs_encode: + file_content = file_content.encode("utf-8") + elif needs_decode: + file_content = file_content.decode("utf-8") stream.write(file_content) stream.seek(0) return stream diff --git a/pyexcel_io/constants.py b/pyexcel_io/constants.py index 24b838a..25e2823 100644 --- a/pyexcel_io/constants.py +++ b/pyexcel_io/constants.py @@ -4,7 +4,7 @@ Constants appeared in pyexcel - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License """ # flake8: noqa @@ -16,6 +16,7 @@ MESSAGE_INVALID_PARAMETERS = "Invalid parameters" MESSAGE_ERROR_02 = "No content, file name. Nothing is given" MESSAGE_ERROR_03 = "cannot handle unknown content" MESSAGE_WRONG_IO_INSTANCE = "Wrong io instance is passed for your file format." +MESSAGE_FILE_NAME_SHOULD_BE_STRING = "file_name should be a string" MESSAGE_CANNOT_WRITE_STREAM_FORMATTER = ( "Cannot write content of file type %s to stream" ) @@ -31,6 +32,8 @@ MESSAGE_CANNOT_READ_FILE_TYPE_FORMATTER = ( MESSAGE_LOADING_FORMATTER = ( "The plugin for file type %s is not installed. Please install %s" ) +MESSAGE_NOT_FILE_FORMATTER = "%s is not a file" +MESSAGE_FILE_DOES_NOT_EXIST = "%s does not exist" MESSAGE_EMPTY_ARRAY = "One empty row is found" MESSAGE_IGNORE_ROW = "One row is ignored" MESSAGE_DB_EXCEPTION = """ @@ -44,8 +47,12 @@ FILE_FORMAT_ODS = "ods" FILE_FORMAT_XLS = "xls" FILE_FORMAT_XLSX = "xlsx" FILE_FORMAT_XLSM = "xlsm" +FILE_FORMAT_XLSB = "xlsb" +FILE_FORMAT_HTML = "html" +FILE_FORMAT_PDF = "pdf" DB_SQL = "sql" DB_DJANGO = "django" +DB_QUERYSET = "queryset" KEYWORD_TSV_DIALECT = "excel-tab" KEYWORD_LINE_TERMINATOR = "lineterminator" diff --git a/pyexcel_io/database/__init__.py b/pyexcel_io/database/__init__.py index e015023..b7b4c6d 100644 --- a/pyexcel_io/database/__init__.py +++ b/pyexcel_io/database/__init__.py @@ -4,23 +4,30 @@ database data importer and exporter - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -from pyexcel_io.plugins import IOPluginInfoChain -from pyexcel_io.constants import DB_DJANGO, DB_SQL +from pyexcel_io.plugins import IOPluginInfoChainV2 +from pyexcel_io.constants import DB_SQL, DB_DJANGO, DB_QUERYSET - -IOPluginInfoChain(__name__).add_a_reader( +IOPluginInfoChainV2(__name__).add_a_reader( + relative_plugin_class_path="exporters.queryset.QueryReader", + locations=["file", "memory", "content"], + file_types=[DB_QUERYSET], +).add_a_reader( relative_plugin_class_path="exporters.django.DjangoBookReader", + locations=["file", "memory", "content"], + file_types=[DB_DJANGO], +).add_a_writer( + relative_plugin_class_path="importers.django.DjangoBookWriter", + locations=["file", "content", "memory"], file_types=[DB_DJANGO], ).add_a_reader( relative_plugin_class_path="exporters.sqlalchemy.SQLBookReader", + locations=["file", "memory", "content"], file_types=[DB_SQL], -).add_a_writer( - relative_plugin_class_path="importers.django.DjangoBookWriter", - file_types=[DB_DJANGO], ).add_a_writer( relative_plugin_class_path="importers.sqlalchemy.SQLBookWriter", + locations=["file", "content", "memory"], file_types=[DB_SQL], ) diff --git a/pyexcel_io/database/common.py b/pyexcel_io/database/common.py index da46acd..dd0f98d 100644 --- a/pyexcel_io/database/common.py +++ b/pyexcel_io/database/common.py @@ -4,31 +4,13 @@ Common classes shared among database importers and exporters - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -from pyexcel_io.book import BookReader - - -class DbExporter(BookReader): - """ Transcode the book reader interface to db interface """ - - def open(self, file_name, **keywords): - self.export_tables(self, file_name, **keywords) - - def open_stream(self, file_stream, **keywords): - self.export_tables(self, file_stream, **keywords) - - def open_content(self, file_content, **keywords): - self.export_tables(file_content, **keywords) - - def export_tables(self, exporter, **keywords): - """ read database tables """ - raise NotImplementedError("Please implement this method") class DjangoModelExportAdapter(object): - """ django export parameter holder """ + """django export parameter holder""" def __init__(self, model, export_columns=None): self.model = model @@ -36,19 +18,19 @@ class DjangoModelExportAdapter(object): @property def name(self): - """ get database table name """ + """get database table name""" return self.get_name() def get_name(self): - """ get database table name """ + """get database table name""" return self.model._meta.model_name class DjangoModelImportAdapter(DjangoModelExportAdapter): - """ parameter holder for django data import """ + """parameter holder for django data import""" class InOutParameter(object): - """ local class to manipulate variable io """ + """local class to manipulate variable io""" def __init__(self): self.output = None @@ -56,91 +38,96 @@ class DjangoModelImportAdapter(DjangoModelExportAdapter): def __init__(self, model): DjangoModelExportAdapter.__init__(self, model) - self.__column_names = self.InOutParameter() - self.__column_name_mapping_dict = self.InOutParameter() - self.__row_initializer = self.InOutParameter() + self._column_names = self.InOutParameter() + self._column_name_mapping_dict = self.InOutParameter() + self._row_initializer = self.InOutParameter() self._process_parameters() @property def row_initializer(self): - """ contructor for a database table entry """ - return self.__row_initializer.output + """contructor for a database table entry""" + return self._row_initializer.output @property def column_names(self): - """ the desginated database column names """ - return self.__column_names.output + """the desginated database column names""" + return self._column_names.output @property def column_name_mapping_dict(self): - """ if not the same, a mapping dictionary is looked up""" - return self.__column_name_mapping_dict.output + """if not the same, a mapping dictionary is looked up""" + return self._column_name_mapping_dict.output @row_initializer.setter def row_initializer(self, a_function): - """ set the contructor """ - self.__row_initializer.input = a_function + """set the contructor""" + self._row_initializer.input = a_function self._process_parameters() @column_names.setter def column_names(self, column_names): - """ set the column names """ - self.__column_names.input = column_names + """set the column names""" + self._column_names.input = column_names self._process_parameters() @column_name_mapping_dict.setter def column_name_mapping_dict(self, mapping_dict): - """ set the mapping dict """ - self.__column_name_mapping_dict.input = mapping_dict + """set the mapping dict""" + self._column_name_mapping_dict.input = mapping_dict self._process_parameters() def _process_parameters(self): - if self.__row_initializer.input is None: - self.__row_initializer.output = None + if self._row_initializer.input is None: + self._row_initializer.output = None else: - self.__row_initializer.output = self.__row_initializer.input - if isinstance(self.__column_name_mapping_dict.input, list): - self.__column_names.output = self.__column_name_mapping_dict.input - self.__column_name_mapping_dict.output = None - elif isinstance(self.__column_name_mapping_dict.input, dict): - if self.__column_names.input: - self.__column_names.output = [ - self.__column_name_mapping_dict.input[name] - for name in self.__column_names.input - ] - self.__column_name_mapping_dict.output = None - if self.__column_names.output is None: - self.__column_names.output = self.__column_names.input + self._row_initializer.output = self._row_initializer.input + if isinstance(self._column_name_mapping_dict.input, list): + self._column_names.output = self._column_name_mapping_dict.input + self._column_name_mapping_dict.output = None + elif isinstance(self._column_name_mapping_dict.input, dict): + + if self._column_names.input: + self._column_names.output = [] + indices = [] + for index, name in enumerate(self._column_names.input): + if name in self._column_name_mapping_dict.input: + self._column_names.output.append( + self._column_name_mapping_dict.input[name] + ) + indices.append(index) + self._column_name_mapping_dict.output = indices + if self._column_names.output is None: + self._column_names.output = self._column_names.input class DjangoModelExporter(object): - """ public interface for django model export """ + """public interface for django model export""" def __init__(self): self.adapters = [] def append(self, import_adapter): - """ store model parameter for more than one model """ + """store model parameter for more than one model""" self.adapters.append(import_adapter) class DjangoModelImporter(object): - """ public interface for django model import """ + """public interface for django model import""" def __init__(self): - self.__adapters = {} + self._adapters = {} def append(self, import_adapter): - """ store model parameter for more than one model """ - self.__adapters[import_adapter.get_name()] = import_adapter + """store model parameter for more than one model""" + self._adapters[import_adapter.get_name()] = import_adapter def get(self, name): - """ get a parameter out """ - return self.__adapters.get(name, None) + """get a parameter out""" + return self._adapters.get(name, None) class SQLTableExportAdapter(DjangoModelExportAdapter): - """ parameter holder for sql table data export """ + """parameter holder for sql table data export""" def __init__(self, model, export_columns=None): DjangoModelExportAdapter.__init__(self, model, export_columns) @@ -151,7 +138,7 @@ class SQLTableExportAdapter(DjangoModelExportAdapter): class SQLTableImportAdapter(DjangoModelImportAdapter): - """ parameter holder for sqlalchemy table import """ + """parameter holder for sqlalchemy table import""" def __init__(self, model): DjangoModelImportAdapter.__init__(self, model) @@ -162,7 +149,7 @@ class SQLTableImportAdapter(DjangoModelImportAdapter): class SQLTableExporter(DjangoModelExporter): - """ public interface for sql table export """ + """public interface for sql table export""" def __init__(self, session): DjangoModelExporter.__init__(self) @@ -170,7 +157,7 @@ class SQLTableExporter(DjangoModelExporter): class SQLTableImporter(DjangoModelImporter): - """ public interface to do data import via sqlalchemy """ + """public interface to do data import via sqlalchemy""" def __init__(self, session): DjangoModelImporter.__init__(self) diff --git a/pyexcel_io/database/exporters/django.py b/pyexcel_io/database/exporters/django.py index 933e587..1953ace 100644 --- a/pyexcel_io/database/exporters/django.py +++ b/pyexcel_io/database/exporters/django.py @@ -4,16 +4,15 @@ The lower level handler for django import and export - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -from pyexcel_io.database.common import DbExporter +from pyexcel_io.plugin_api import IReader from pyexcel_io.database.querysets import QuerysetsReader class DjangoModelReader(QuerysetsReader): - """Read from django model - """ + """Read from django model""" def __init__(self, model, export_columns=None, **keywords): self.__model = model @@ -28,22 +27,20 @@ class DjangoModelReader(QuerysetsReader): ) -class DjangoBookReader(DbExporter): - """ read django models """ +class DjangoBookReader(IReader): + """read django models""" - def __init__(self): - DbExporter.__init__(self) - self.exporter = None + def __init__(self, exporter, _, **keywords): + self.exporter = exporter + self.keywords = keywords + self.content_array = self.exporter.adapters - def export_tables(self, file_content, **keywords): - self.exporter = file_content - self._load_from_django_models() - - def read_sheet(self, native_sheet): + def read_sheet(self, native_sheet_index): + native_sheet = self.content_array[native_sheet_index] reader = DjangoModelReader( - native_sheet.model, native_sheet.export_columns + native_sheet.model, export_columns=native_sheet.export_columns ) - return reader.to_array() + return reader - def _load_from_django_models(self): - self._native_book = self.exporter.adapters + def close(self): + pass diff --git a/pyexcel_io/database/exporters/queryset.py b/pyexcel_io/database/exporters/queryset.py new file mode 100644 index 0000000..6cca0e7 --- /dev/null +++ b/pyexcel_io/database/exporters/queryset.py @@ -0,0 +1,20 @@ +from pyexcel_io.plugin_api import IReader +from pyexcel_io.database.querysets import QuerysetsReader + + +class QueryReader(IReader): + def __init__(self, query_sets, _, column_names=None, **keywords): + self.query_sets = query_sets + self.column_names = column_names + self.keywords = keywords + self.content_array = [ + QuerysetsReader( + self.query_sets, self.column_names, **self.keywords + ) + ] + + def read_sheet(self, index): + return self.content_array[index] + + def close(self): + pass diff --git a/pyexcel_io/database/exporters/sqlalchemy.py b/pyexcel_io/database/exporters/sqlalchemy.py index dc32ef8..a1db305 100644 --- a/pyexcel_io/database/exporters/sqlalchemy.py +++ b/pyexcel_io/database/exporters/sqlalchemy.py @@ -4,16 +4,15 @@ The lower level handler for database import and export - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -from pyexcel_io.database.common import DbExporter +from pyexcel_io.plugin_api import IReader from pyexcel_io.database.querysets import QuerysetsReader class SQLTableReader(QuerysetsReader): - """Read a table - """ + """Read a table""" def __init__(self, session, table, export_columns=None, **keywords): everything = session.query(table).all() @@ -32,24 +31,22 @@ class SQLTableReader(QuerysetsReader): QuerysetsReader.__init__(self, everything, column_names, **keywords) -class SQLBookReader(DbExporter): - """ read a table via sqlalchemy """ +class SQLBookReader(IReader): + """read a table via sqlalchemy""" - def __init__(self): - DbExporter.__init__(self) - self.__exporter = None + def __init__(self, exporter, _, **keywords): + self.__exporter = exporter + self.content_array = self.__exporter.adapters + self.keywords = keywords - def export_tables(self, file_content, **keywords): - self.__exporter = file_content - self._load_from_tables() - - def read_sheet(self, native_sheet): + def read_sheet(self, native_sheet_index): + native_sheet = self.content_array[native_sheet_index] reader = SQLTableReader( self.__exporter.session, native_sheet.table, native_sheet.export_columns, ) - return reader.to_array() + return reader - def _load_from_tables(self): - self._native_book = self.__exporter.adapters + def close(self): + pass diff --git a/pyexcel_io/database/importers/django.py b/pyexcel_io/database/importers/django.py index 8935d1b..42f464f 100644 --- a/pyexcel_io/database/importers/django.py +++ b/pyexcel_io/database/importers/django.py @@ -4,77 +4,75 @@ The lower level handler for django import and export - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ import logging -from pyexcel_io.book import BookWriter -from pyexcel_io.sheet import SheetWriter -from pyexcel_io.utils import is_empty_array, swap_empty_string_for_none import pyexcel_io.constants as constants +from pyexcel_io.utils import is_empty_array, swap_empty_string_for_none +from pyexcel_io.plugin_api import IWriter, ISheetWriter log = logging.getLogger(__name__) -class DjangoModelWriter(SheetWriter): - """ import data into a django model """ +class DjangoModelWriter(ISheetWriter): + """import data into a django model""" def __init__(self, importer, adapter, batch_size=None, bulk_save=True): - SheetWriter.__init__(self, importer, adapter, adapter.name) - self.__batch_size = batch_size - self.__model = adapter.model - self.__column_names = adapter.column_names - self.__mapdict = adapter.column_name_mapping_dict - self.__initializer = adapter.row_initializer - self.__objs = [] - self.__bulk_save = bulk_save + self.batch_size = batch_size + self.model = adapter.model + self.column_names = adapter.column_names + self.mapdict = adapter.column_name_mapping_dict + self.initializer = adapter.row_initializer + self.objs = [] + self.bulk_save = bulk_save + self.adapter = adapter def write_row(self, array): if is_empty_array(array): - print(constants.MESSAGE_EMPTY_ARRAY) + log.warning(constants.MESSAGE_EMPTY_ARRAY) else: new_array = swap_empty_string_for_none(array) + if self.mapdict: + another_new_array = [] + for index, element in enumerate(new_array): + if index in self.mapdict: + another_new_array.append(element) + new_array = another_new_array model_to_be_created = new_array - if self.__initializer is not None: - model_to_be_created = self.__initializer(new_array) + if self.initializer is not None: + model_to_be_created = self.initializer(new_array) if model_to_be_created: - self.__objs.append( - self.__model( - **dict(zip(self.__column_names, model_to_be_created)) - ) - ) + row = dict(zip(self.column_names, model_to_be_created)) + self.objs.append(self.model(**row)) # else # skip the row def close(self): - if self.__bulk_save: - self.__model.objects.bulk_create( - self.__objs, batch_size=self.__batch_size + if self.bulk_save: + self.model.objects.bulk_create( + self.objs, batch_size=self.batch_size ) else: - for an_object in self.__objs: + for an_object in self.objs: an_object.save() -class DjangoBookWriter(BookWriter): - """ write data into django models """ +class DjangoBookWriter(IWriter): + """write data into django models""" - def __init__(self): - BookWriter.__init__(self) - self.__importer = None - - def open_content(self, file_content, **keywords): - self.__importer = file_content + def __init__(self, exporter, _, **keywords): + self.importer = exporter self._keywords = keywords def create_sheet(self, sheet_name): sheet_writer = None - model = self.__importer.get(sheet_name) + model = self.importer.get(sheet_name) if model: sheet_writer = DjangoModelWriter( - self.__importer, + self.importer, model, batch_size=self._keywords.get("batch_size", None), bulk_save=self._keywords.get("bulk_save", True), @@ -86,3 +84,6 @@ class DjangoBookWriter(BookWriter): ) return sheet_writer + + def close(self): + pass diff --git a/pyexcel_io/database/importers/sqlalchemy.py b/pyexcel_io/database/importers/sqlalchemy.py index 0f8a079..db10dc5 100644 --- a/pyexcel_io/database/importers/sqlalchemy.py +++ b/pyexcel_io/database/importers/sqlalchemy.py @@ -4,13 +4,16 @@ The lower level handler for database import and export - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -from pyexcel_io.book import BookWriter -from pyexcel_io.sheet import SheetWriter -from pyexcel_io.utils import is_empty_array, swap_empty_string_for_none +import logging + import pyexcel_io.constants as constants +from pyexcel_io.utils import is_empty_array, swap_empty_string_for_none +from pyexcel_io.plugin_api import IWriter, ISheetWriter + +LOG = logging.getLogger(__name__) class PyexcelSQLSkipRowException(Exception): @@ -22,66 +25,62 @@ class PyexcelSQLSkipRowException(Exception): pass -class SQLTableWriter(SheetWriter): - """Write to a table - """ +class SQLTableWriter(ISheetWriter): + """Write to a table""" def __init__( self, importer, adapter, auto_commit=True, bulk_size=1000, **keywords ): - SheetWriter.__init__( - self, importer, adapter, adapter.get_name(), **keywords - ) self.__auto_commit = auto_commit self.__count = 0 self.__bulk_size = bulk_size + self.adapter = adapter + self.importer = importer def write_row(self, array): if is_empty_array(array): - print(constants.MESSAGE_EMPTY_ARRAY) + LOG.warning(constants.MESSAGE_EMPTY_ARRAY) else: new_array = swap_empty_string_for_none(array) try: self._write_row(new_array) except PyexcelSQLSkipRowException: - print(constants.MESSAGE_IGNORE_ROW) - print(new_array) + LOG.info(constants.MESSAGE_IGNORE_ROW) + LOG.info(new_array) def _write_row(self, array): - row = dict(zip(self._native_sheet.column_names, array)) + new_array = array + if self.adapter.column_name_mapping_dict: + another_new_array = [] + for index, element in enumerate(new_array): + if index in self.adapter.column_name_mapping_dict: + another_new_array.append(element) + new_array = another_new_array + row = dict(zip(self.adapter.column_names, new_array)) obj = None - if self._native_sheet.row_initializer: + if self.adapter.row_initializer: # allow initinalizer to return None # if skipping is needed - obj = self._native_sheet.row_initializer(row) + obj = self.adapter.row_initializer(row) if obj is None: - obj = self._native_sheet.table() - for name in self._native_sheet.column_names: - if self._native_sheet.column_name_mapping_dict is not None: - key = self._native_sheet.column_name_mapping_dict[name] - else: - key = name - setattr(obj, key, row[name]) - self._native_book.session.add(obj) + obj = self.adapter.table() + for name in self.adapter.column_names: + setattr(obj, name, row[name]) + self.importer.session.add(obj) if self.__auto_commit and self.__bulk_size != float("inf"): self.__count += 1 if self.__count % self.__bulk_size == 0: - self._native_book.session.commit() + self.importer.session.commit() def close(self): if self.__auto_commit: - self._native_book.session.commit() + self.importer.session.commit() -class SQLBookWriter(BookWriter): - """ write data into database tables via sqlalchemy """ +class SQLBookWriter(IWriter): + """write data into database tables via sqlalchemy""" - def __init__(self): - BookWriter.__init__(self) - self.__importer = None - self.__auto_commit = True - - def open_content(self, file_content, auto_commit=True, **keywords): + def __init__(self, file_content, _, auto_commit=True, **keywords): self.__importer = file_content self.__auto_commit = auto_commit @@ -99,3 +98,6 @@ class SQLBookWriter(BookWriter): ) return sheet_writer + + def close(self): + pass diff --git a/pyexcel_io/database/querysets.py b/pyexcel_io/database/querysets.py index b062567..3ce08da 100644 --- a/pyexcel_io/database/querysets.py +++ b/pyexcel_io/database/querysets.py @@ -4,23 +4,27 @@ The lower level handler for querysets - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ import datetime from itertools import chain -from pyexcel_io.sheet import SheetReader +from pyexcel_io.constants import DEFAULT_SHEET_NAME +from pyexcel_io.plugin_api.abstract_sheet import ISheet -class QuerysetsReader(SheetReader): - """ turn querysets into an array """ +class QuerysetsReader(ISheet): + """turn querysets into an array""" - def __init__(self, query_sets, column_names, **keywords): - SheetReader.__init__(self, query_sets, **keywords) + def __init__(self, query_sets, column_names): + self.name = DEFAULT_SHEET_NAME self.__column_names = column_names self.__query_sets = query_sets + def row_iterator(self): + return chain([self.__column_names], self.__query_sets) + def to_array(self): """ Convert query sets into an array @@ -28,11 +32,11 @@ class QuerysetsReader(SheetReader): if len(self.__query_sets) == 0: yield [] - for element in SheetReader.to_array(self): - yield element - - def row_iterator(self): - return chain([self.__column_names], self.__query_sets) + for row in self.row_iterator(): + row_values = [] + for value in self.column_iterator(row): + row_values.append(value) + yield row_values def column_iterator(self, row): if self.__column_names is None: @@ -52,7 +56,7 @@ class QuerysetsReader(SheetReader): def get_complex_attribute(row, attribute): - """ recursively get an attribute """ + """recursively get an attribute""" attributes = attribute.split("__") value = row try: @@ -64,7 +68,7 @@ def get_complex_attribute(row, attribute): def get_simple_attribute(row, attribute): - """ get dotted attribute """ + """get dotted attribute""" value = getattr(row, attribute) if isinstance(value, (datetime.date, datetime.time)): value = value.isoformat() diff --git a/pyexcel_io/exceptions.py b/pyexcel_io/exceptions.py index a971cb0..2c8ed9f 100644 --- a/pyexcel_io/exceptions.py +++ b/pyexcel_io/exceptions.py @@ -4,7 +4,7 @@ all possible exceptions - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ @@ -21,12 +21,6 @@ class SupportingPluginAvailableButNotInstalled(Exception): pass -class UpgradePlugin(Exception): - """raised when a known plugin is not compatible""" - - pass - - class IntegerAccuracyLossError(Exception): """ When an interger is greater than 999999999999999, ods loses its accuracy. @@ -46,11 +40,13 @@ class IntegerAccuracyLossError(Exception): b=get_sheet(file_name='abc.ods') b[0,0] != s[0,0] """ + def __init__(self, message): custom_message = ( - message + '\n' + - "In order to keep its accuracy, please save as string. Then " + - "convert to int, long or float after the value will be read back" + message + + "\n" + + "In order to keep its accuracy, please save as string. Then " + + "convert to int, long or float after the value will be read back" ) super(IntegerAccuracyLossError, self).__init__(custom_message) diff --git a/pyexcel_io/io.py b/pyexcel_io/io.py index 4dd0e76..8111959 100644 --- a/pyexcel_io/io.py +++ b/pyexcel_io/io.py @@ -4,20 +4,60 @@ The io interface to file extensions - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -from types import GeneratorType +import os import warnings +from types import GeneratorType -from pyexcel_io._compact import isstream, PY2 -from pyexcel_io.plugins import READERS, WRITERS -import pyexcel_io.constants as constants +from pyexcel_io import constants +from pyexcel_io.reader import Reader +from pyexcel_io.writer import Writer +from pyexcel_io.plugins import OLD_READERS, OLD_WRITERS +from pyexcel_io._compact import isstream +from pyexcel_io.exceptions import ( + NoSupportingPluginFound, + SupportingPluginAvailableButNotInstalled, +) def iget_data(afile, file_type=None, **keywords): """Get data from an excel file source + The data has not gone into memory yet. If you use dedicated partial read + plugins, such as pyexcel-xlsxr, pyexcel-odsr, you will notice + the memory consumption drop when you work with big files. + + :param afile: a file name, a file stream or actual content + :param sheet_name: the name of the sheet to be loaded + :param sheet_index: the index of the sheet to be loaded + :param sheets: a list of sheet to be loaded + :param file_type: used only when filename is not a physical file name + :param force_file_type: used only when filename refers to a physical file + and it is intended to open it as forced file type. + :param library: explicitly name a library for use. + e.g. library='pyexcel-ods' + :param auto_detect_float: defaults to True + :param auto_detect_int: defaults to True + :param auto_detect_datetime: defaults to True + :param ignore_infinity: defaults to True + :param ignore_nan_text: various forms of 'NaN', 'nan' are ignored + :param default_float_nan: choose one form of 'NaN', 'nan' + :param pep_0515_off: turn off pep 0515. default to True. + :param keep_trailing_empty_cells: keep trailing columns. default to False + :param keywords: any other library specific parameters + :returns: an ordered dictionary + """ + data, reader = _get_data( + afile, file_type=file_type, streaming=True, **keywords + ) + return data, reader + + +def get_data(afile, file_type=None, streaming=None, **keywords): + """Get data from an excel file source + :param afile: a file name, a file stream or actual content :param sheet_name: the name of the sheet to be loaded :param sheet_index: the index of the sheet to be loaded @@ -37,31 +77,7 @@ def iget_data(afile, file_type=None, **keywords): :param ignore_nan_text: various forms of 'NaN', 'nan' are ignored :param default_float_nan: choose one form of 'NaN', 'nan' :param pep_0515_off: turn off pep 0515. default to True. - :param keywords: any other library specific parameters - :returns: an ordered dictionary - """ - data, reader = _get_data( - afile, file_type=file_type, streaming=True, **keywords - ) - return data, reader - - -def get_data(afile, file_type=None, streaming=None, **keywords): - """Get data from an excel file source - - :param afile: a file name, a file stream or actual content - :param sheet_name: the name of the sheet to be loaded - :param sheet_index: the index of the sheet to be loaded - :param file_type: used only when filename is not a physial file name - :param streaming: toggles the type of returned data. The values of the - returned dictionary remain as generator if it is set - to True. Default is False. - :param library: explicitly name a library for use. - e.g. library='pyexcel-ods' - :param auto_detect_float: defaults to True - :param auto_detect_int: defaults to True - :param auto_detect_datetime: defaults to True - :param ignore_infinity: defaults to True + :param keep_trailing_empty_cells: keep trailing columns. default to False :param keywords: any other library specific parameters :returns: an ordered dictionary """ @@ -97,6 +113,8 @@ def save_data(afile, data, file_type=None, **keywords): :param filename: actual file name, a file stream or actual content :param data: a dictionary but an ordered dictionary is preferred :param file_type: used only when filename is not a physial file name + :param force_file_type: used only when filename refers to a physical file + and it is intended to open it as forced file type. :param library: explicitly name a library for use. e.g. library='pyexcel-ods' :param keywords: any other parameters that python csv module's @@ -109,39 +127,20 @@ def save_data(afile, data, file_type=None, **keywords): single_sheet_in_book = True to_store = {constants.DEFAULT_SHEET_NAME: data} else: - if PY2: - keys = data.keys() - else: - keys = list(data.keys()) + keys = list(data.keys()) single_sheet_in_book = len(keys) == 1 no_file_type = isstream(afile) and file_type is None if no_file_type: file_type = constants.FILE_FORMAT_CSV - store_data( - afile, - to_store, - file_type=file_type, - single_sheet_in_book=single_sheet_in_book, - **keywords - ) - - -def store_data(afile, data, file_type=None, **keywords): - """Non public function to store data to afile - - :param filename: actual file name, a file stream or actual content - :param data: the data to be written - :param file_type: used only when filename is not a physial file name - :param keywords: any other parameters - """ if isstream(afile): keywords.update(dict(file_stream=afile, file_type=file_type)) else: keywords.update(dict(file_name=afile, file_type=file_type)) + keywords["single_sheet_in_book"] = single_sheet_in_book with get_writer(**keywords) as writer: - writer.write(data) + writer.write(to_store) def load_data( @@ -180,34 +179,61 @@ def load_data( try: file_type = file_name.split(".")[-1] except AttributeError: - raise Exception("file_name should be a string type") + raise Exception(constants.MESSAGE_FILE_NAME_SHOULD_BE_STRING) - reader = READERS.get_a_plugin(file_type, library) - if file_name: - reader.open(file_name, **keywords) - elif file_content: - reader.open_content(file_content, **keywords) - elif file_stream: - reader.open_stream(file_stream, **keywords) - if sheet_name: - result = reader.read_sheet_by_name(sheet_name) - elif sheet_index is not None: - result = reader.read_sheet_by_index(sheet_index) - elif sheets is not None: - result = reader.read_many(sheets) - else: - result = reader.read_all() - if streaming is False: - for key in result.keys(): - result[key] = list(result[key]) - reader.close() - reader = None + try: + reader = OLD_READERS.get_a_plugin(file_type, library) + except (NoSupportingPluginFound, SupportingPluginAvailableButNotInstalled): + reader = Reader(file_type, library) - return result, reader + try: + if file_name: + reader.open(file_name, **keywords) + elif file_content: + reader.open_content(file_content, **keywords) + elif file_stream: + reader.open_stream(file_stream, **keywords) + else: + raise IOError("Unrecognized options") + if sheet_name: + result = reader.read_sheet_by_name(sheet_name) + elif sheet_index is not None: + result = reader.read_sheet_by_index(sheet_index) + elif sheets is not None: + result = reader.read_many(sheets) + else: + result = reader.read_all() + if streaming is False: + for key in result.keys(): + result[key] = list(result[key]) + reader.close() + reader = None + + return result, reader + except NoSupportingPluginFound: + if file_name: + if os.path.exists(file_name): + if os.path.isfile(file_name): + raise + else: + raise IOError( + constants.MESSAGE_NOT_FILE_FORMATTER % file_name + ) + else: + raise IOError( + constants.MESSAGE_FILE_DOES_NOT_EXIST % file_name + ) + else: + raise def get_writer( - file_name=None, file_stream=None, file_type=None, library=None, **keywords + file_name=None, + file_stream=None, + file_type=None, + library=None, + force_file_type=None, + **keywords ): """find a suitable writer""" inputs = [file_name, file_stream] @@ -217,15 +243,23 @@ def get_writer( raise IOError(constants.MESSAGE_ERROR_02) file_type_given = True + if file_type is None and file_name: - try: - file_type = file_name.split(".")[-1] - except AttributeError: - raise Exception("file_name should be a string type") + if force_file_type: + file_type = force_file_type + else: + try: + file_type = file_name.split(".")[-1] + except AttributeError: + raise Exception(constants.MESSAGE_FILE_NAME_SHOULD_BE_STRING) file_type_given = False - writer = WRITERS.get_a_plugin(file_type, library) + try: + writer = OLD_WRITERS.get_a_plugin(file_type, library) + except (NoSupportingPluginFound, SupportingPluginAvailableButNotInstalled): + writer = Writer(file_type, library) + if file_name: if file_type_given: writer.open_content(file_name, **keywords) @@ -235,3 +269,7 @@ def get_writer( writer.open_stream(file_stream, **keywords) # else: is resolved by earlier raise statement return writer + + +# backward compactibility +store_data = save_data diff --git a/pyexcel_io/manager.py b/pyexcel_io/manager.py index 03c4586..9e15090 100644 --- a/pyexcel_io/manager.py +++ b/pyexcel_io/manager.py @@ -4,11 +4,10 @@ Control file streams - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -from pyexcel_io._compact import StringIO, BytesIO - +from pyexcel_io._compact import BytesIO, StringIO MIME_TYPES = {} FILE_TYPES = () diff --git a/pyexcel_io/plugin_api/__init__.py b/pyexcel_io/plugin_api/__init__.py new file mode 100644 index 0000000..4d2d771 --- /dev/null +++ b/pyexcel_io/plugin_api/__init__.py @@ -0,0 +1,3 @@ +from .abstract_sheet import ISheet, ISheetWriter, NamedContent # noqa: F401 +from .abstract_reader import IReader # noqa: F401 +from .abstract_writer import IWriter # noqa: F401 diff --git a/pyexcel_io/plugin_api/abstract_reader.py b/pyexcel_io/plugin_api/abstract_reader.py new file mode 100644 index 0000000..8ee4b4b --- /dev/null +++ b/pyexcel_io/plugin_api/abstract_reader.py @@ -0,0 +1,21 @@ +from .abstract_sheet import ISheet + + +class IReader(object): + """ + content_array should be a list of NamedContent + where: name is the sheet name, + payload is the native sheet. + """ + + def read_sheet(self, sheet_index) -> ISheet: + raise NotImplementedError("Read the sheet by index") + + def sheet_names(self): + return [content.name for content in self.content_array] + + def __len__(self): + return len(self.content_array) + + def close(self): + raise NotImplementedError("Close the file") diff --git a/pyexcel_io/plugin_api/abstract_sheet.py b/pyexcel_io/plugin_api/abstract_sheet.py new file mode 100644 index 0000000..ff81b76 --- /dev/null +++ b/pyexcel_io/plugin_api/abstract_sheet.py @@ -0,0 +1,31 @@ +class ISheet(object): + def row_iterator(self): + raise NotImplementedError("iterate each row") + + def column_iterator(self, row): + raise NotImplementedError("iterate each column at a given row") + + +class ISheetWriter(object): + def write_row(self, data_row): + raise NotImplementedError("How does your sheet write a row of data") + + def write_array(self, table): + """ + For standalone usage, write an array + """ + for row in table: + self.write_row(row) + + def close(self): + raise NotImplementedError("How would you close your file") + + +class NamedContent(object): + """ + Helper class for content that does not have a name + """ + + def __init__(self, name, payload): + self.name = name + self.payload = payload diff --git a/pyexcel_io/plugin_api/abstract_writer.py b/pyexcel_io/plugin_api/abstract_writer.py new file mode 100644 index 0000000..0f0c8f5 --- /dev/null +++ b/pyexcel_io/plugin_api/abstract_writer.py @@ -0,0 +1,15 @@ +from .abstract_sheet import ISheetWriter + + +class IWriter(object): + def create_sheet(self, sheet_name) -> ISheetWriter: + raise NotImplementedError("Please implement a native sheet writer") + + def write(self, incoming_dict): + for sheet_name in incoming_dict: + sheet_writer = self.create_sheet(sheet_name) + if sheet_writer: + sheet_writer.write_array(incoming_dict[sheet_name]) + sheet_writer.close() + else: + raise Exception("Cannot create a sheet writer!") diff --git a/pyexcel_io/plugins.py b/pyexcel_io/plugins.py index 016e0a2..495283a 100644 --- a/pyexcel_io/plugins.py +++ b/pyexcel_io/plugins.py @@ -4,24 +4,23 @@ factory for getting readers and writers - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -from lml.loader import scan_plugins_regex -from lml.plugin import PluginManager -from lml.plugin import PluginInfoChain, PluginInfo - import pyexcel_io.utils as ioutils import pyexcel_io.manager as manager -import pyexcel_io.exceptions as exceptions import pyexcel_io.constants as constants - +import pyexcel_io.exceptions as exceptions +from lml.loader import scan_plugins_regex +from lml.plugin import PluginInfo, PluginManager, PluginInfoChain ERROR_MESSAGE_FORMATTER = "one of these plugins for %s data in '%s': %s" UPGRADE_MESSAGE = "Please upgrade the plugin '%s' according to \ plugin compactibility table." READER_PLUGIN = "pyexcel-io reader" +READER_PLUGIN_V2 = "pyexcel-io v2 reader" WRITER_PLUGIN = "pyexcel-io writer" +WRITER_PLUGIN_V2 = "pyexcel-io v2 writer" class IOPluginInfo(PluginInfo): @@ -33,7 +32,7 @@ class IOPluginInfo(PluginInfo): class IOPluginInfoChain(PluginInfoChain): - """provide custom functions to add a reader and a writer """ + """provide custom functions to add a reader and a writer""" def add_a_reader( self, @@ -41,7 +40,7 @@ class IOPluginInfoChain(PluginInfoChain): file_types=None, stream_type=None, ): - """ add pyexcle-io reader plugin info """ + """add pyexcle-io reader plugin info""" a_plugin_info = IOPluginInfo( READER_PLUGIN, self._get_abs_path(relative_plugin_class_path), @@ -56,7 +55,7 @@ class IOPluginInfoChain(PluginInfoChain): file_types=None, stream_type=None, ): - """ add pyexcle-io writer plugin info """ + """add pyexcle-io writer plugin info""" a_plugin_info = IOPluginInfo( WRITER_PLUGIN, self._get_abs_path(relative_plugin_class_path), @@ -66,6 +65,50 @@ class IOPluginInfoChain(PluginInfoChain): return self.add_a_plugin_instance(a_plugin_info) +class IOPluginInfoChainV2(PluginInfoChain): + """provide custom functions to add a reader and a writer""" + + def add_a_reader( + self, + relative_plugin_class_path=None, + locations=(), + file_types=None, + stream_type=None, + ): + """add pyexcle-io reader plugin info""" + a_plugin_info = IOPluginInfo( + READER_PLUGIN_V2, + self._get_abs_path(relative_plugin_class_path), + file_types=[ + f"{location}-{file_type}" + for file_type in file_types + for location in locations + ], + stream_type=stream_type, + ) + return self.add_a_plugin_instance(a_plugin_info) + + def add_a_writer( + self, + relative_plugin_class_path=None, + locations=(), + file_types=(), + stream_type=None, + ): + """add pyexcle-io writer plugin info""" + a_plugin_info = IOPluginInfo( + WRITER_PLUGIN_V2, + self._get_abs_path(relative_plugin_class_path), + file_types=[ + f"{location}-{file_type}" + for file_type in file_types + for location in locations + ], + stream_type=stream_type, + ) + return self.add_a_plugin_instance(a_plugin_info) + + class IOManager(PluginManager): """Manage pyexcel-io plugins""" @@ -81,13 +124,16 @@ class IOManager(PluginManager): _do_additional_registration(plugin_info) def register_a_plugin(self, cls, plugin_info): - """ for dynamically loaded plugin """ + """for dynamically loaded plugin""" PluginManager.register_a_plugin(self, cls, plugin_info) _do_additional_registration(plugin_info) def get_a_plugin(self, file_type=None, library=None, **keywords): __file_type = file_type.lower() - plugin = self.load_me_now(__file_type, library=library) + try: + plugin = self.load_me_now(__file_type, library=library) + except Exception: + self.raise_exception(__file_type) handler = plugin() handler.set_type(__file_type) return handler @@ -112,7 +158,7 @@ class IOManager(PluginManager): ) def get_all_formats(self): - """ return all supported formats """ + """return all supported formats""" all_formats = set( list(self.registry.keys()) + list(self.known_plugins.keys()) ) @@ -122,14 +168,88 @@ class IOManager(PluginManager): return all_formats +class NewIOManager(IOManager): + def load_me_later(self, plugin_info): + PluginManager.load_me_later(self, plugin_info) + _do_additional_registration_for_new_plugins(plugin_info) + + def register_a_plugin(self, cls, plugin_info): + """for dynamically loaded plugin""" + PluginManager.register_a_plugin(self, cls, plugin_info) + _do_additional_registration_for_new_plugins(plugin_info) + + def get_a_plugin( + self, file_type=None, location=None, library=None, **keywords + ): + __file_type = file_type.lower() + plugin = self.load_me_now(f"{location}-{__file_type}", library=library) + return plugin + + def raise_exception(self, file_type): + file_type = file_type.split("-")[1] + plugins = self.known_plugins.get(file_type, None) + if plugins: + message = "Please install " + if len(plugins) > 1: + message += ERROR_MESSAGE_FORMATTER % ( + self.action, + file_type, + ",".join(plugins), + ) + else: + message += plugins[0] + raise exceptions.SupportingPluginAvailableButNotInstalled(message) + + else: + raise exceptions.NoSupportingPluginFound( + "No suitable library found for %s" % file_type + ) + + def get_all_formats(self): + """return all supported formats""" + all_formats = set( + [x.split("-")[1] for x in self.registry.keys()] + + list(self.known_plugins.keys()) + ) + return all_formats + + def _do_additional_registration(plugin_info): for file_type in plugin_info.tags(): manager.register_stream_type(file_type, plugin_info.stream_type) manager.register_a_file_type(file_type, plugin_info.stream_type, None) -READERS = IOManager(READER_PLUGIN, ioutils.AVAILABLE_READERS) -WRITERS = IOManager(WRITER_PLUGIN, ioutils.AVAILABLE_WRITERS) +def _do_additional_registration_for_new_plugins(plugin_info): + for file_type in plugin_info.tags(): + manager.register_stream_type( + file_type.split("-")[1], plugin_info.stream_type + ) + manager.register_a_file_type( + file_type.split("-")[1], plugin_info.stream_type, None + ) + + +class AllReaders: + def get_all_formats(self): + return OLD_READERS.get_all_formats().union( + NEW_READERS.get_all_formats() + ) - set([constants.DB_SQL, constants.DB_DJANGO]) + + +class AllWriters: + def get_all_formats(self): + return OLD_WRITERS.get_all_formats().union( + NEW_WRITERS.get_all_formats() + ) - set([constants.DB_SQL, constants.DB_DJANGO]) + + +OLD_READERS = IOManager(READER_PLUGIN, ioutils.AVAILABLE_READERS) +OLD_WRITERS = IOManager(WRITER_PLUGIN, ioutils.AVAILABLE_WRITERS) +NEW_WRITERS = NewIOManager(WRITER_PLUGIN_V2, ioutils.AVAILABLE_WRITERS) +NEW_READERS = NewIOManager(READER_PLUGIN_V2, ioutils.AVAILABLE_READERS) +READERS = AllReaders() +WRITERS = AllWriters() def load_plugins(plugin_name_patterns, path, black_list, white_list): @@ -138,5 +258,5 @@ def load_plugins(plugin_name_patterns, path, black_list, white_list): plugin_name_patterns=plugin_name_patterns, pyinstaller_path=path, black_list=black_list, - white_list=white_list + white_list=white_list, ) diff --git a/pyexcel_io/reader.py b/pyexcel_io/reader.py new file mode 100644 index 0000000..f0276fa --- /dev/null +++ b/pyexcel_io/reader.py @@ -0,0 +1,126 @@ +from pyexcel_io.sheet import SheetReader +from pyexcel_io.plugins import NEW_READERS +from pyexcel_io._compact import OrderedDict + + +def clean_keywords(keywords): + sheet_keywords = {} + native_sheet_keywords = {} + args_list = [ + "start_row", + "row_limit", + "start_column", + "column_limit", + "skip_column_func", + "skip_row_func", + "skip_empty_rows", + "row_renderer", + "keep_trailing_empty_cells", + ] + for arg in keywords: + if arg in args_list: + sheet_keywords[arg] = keywords[arg] + else: + native_sheet_keywords[arg] = keywords[arg] + return sheet_keywords, native_sheet_keywords + + +class Reader(object): + def __init__(self, file_type, library=None): + self.file_type = file_type + self.library = library + self.keywords = None + + # if you know which reader class to use, this attribute allows + # you to set reader class externally. Since there is no + # so call private field in Python, I am not going to create + # useless setter and getter functions like Java. + # in pyexcel, this attribute is mainly used for testing + self.reader_class = None + + def open(self, file_name, **keywords): + if self.reader_class is None: + self.reader_class = NEW_READERS.get_a_plugin( + self.file_type, location="file", library=self.library + ) + self.keywords, native_sheet_keywords = clean_keywords(keywords) + self.reader = self.reader_class( + file_name, self.file_type, **native_sheet_keywords + ) + return self.reader + + def open_content(self, file_content, **keywords): + self.keywords, native_sheet_keywords = clean_keywords(keywords) + if self.reader_class is None: + self.reader_class = NEW_READERS.get_a_plugin( + self.file_type, location="content", library=self.library + ) + self.reader = self.reader_class( + file_content, self.file_type, **native_sheet_keywords + ) + return self.reader + + def open_stream(self, file_stream, **keywords): + self.keywords, native_sheet_keywords = clean_keywords(keywords) + if self.reader_class is None: + self.reader_class = NEW_READERS.get_a_plugin( + self.file_type, location="memory", library=self.library + ) + self.reader = self.reader_class( + file_stream, self.file_type, **native_sheet_keywords + ) + return self.reader + + def read_sheet_by_name(self, sheet_name): + """ + read a named sheet from a excel data book + """ + sheet_names = self.reader.sheet_names() + index = sheet_names.index(sheet_name) + + return self.read_sheet_by_index(index) + + def read_sheet_by_index(self, sheet_index): + sheet_reader = self.reader.read_sheet(sheet_index) + sheet_names = self.reader.sheet_names() + sheet = EncapsulatedSheetReader(sheet_reader, **self.keywords) + return {sheet_names[sheet_index]: sheet.to_array()} + + def read_all(self): + """ + read everything from a excel data book + """ + result = OrderedDict() + for sheet_index in range(len(self.reader)): + content_dict = self.read_sheet_by_index(sheet_index) + result.update(content_dict) + return result + + def read_many(self, sheets): + """ + read everything from a excel data book + """ + result = OrderedDict() + for sheet in sheets: + if isinstance(sheet, int): + result.update(self.read_sheet_by_index(sheet)) + else: + result.update(self.read_sheet_by_name(sheet)) + return result + + def close(self): + return self.reader.close() + + def __enter__(self): + return self + + def __exit__(self, a_type, value, traceback): + self.close() + + +class EncapsulatedSheetReader(SheetReader): + def row_iterator(self): + yield from self._native_sheet.row_iterator() + + def column_iterator(self, row): + yield from self._native_sheet.column_iterator(row) diff --git a/pyexcel_io/readers/__init__.py b/pyexcel_io/readers/__init__.py index 59bcfea..c597292 100644 --- a/pyexcel_io/readers/__init__.py +++ b/pyexcel_io/readers/__init__.py @@ -4,26 +4,34 @@ file readers - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -from pyexcel_io.plugins import IOPluginInfoChain +from pyexcel_io.plugins import IOPluginInfoChainV2 - -IOPluginInfoChain(__name__).add_a_reader( - relative_plugin_class_path="csvr.CSVBookReader", - file_types=["csv"], +IOPluginInfoChainV2(__name__).add_a_reader( + relative_plugin_class_path="csv_in_file.FileReader", + locations=["file"], + file_types=["csv", "tsv"], stream_type="text", ).add_a_reader( - relative_plugin_class_path="tsv.TSVBookReader", - file_types=["tsv"], + relative_plugin_class_path="csv_content.ContentReader", + locations=["content"], + file_types=["csv", "tsv"], stream_type="text", ).add_a_reader( - relative_plugin_class_path="csvz.CSVZipBookReader", - file_types=["csvz"], + relative_plugin_class_path="csv_in_memory.MemoryReader", + locations=["memory"], + file_types=["csv", "tsv"], + stream_type="text", +).add_a_reader( + relative_plugin_class_path="csvz.FileReader", + file_types=["csvz", "tsvz"], + locations=["file", "memory"], stream_type="binary", ).add_a_reader( - relative_plugin_class_path="tsvz.TSVZipBookReader", - file_types=["tsvz"], + relative_plugin_class_path="csvz.ContentReader", + file_types=["csvz", "tsvz"], + locations=["content"], stream_type="binary", ) diff --git a/pyexcel_io/readers/csv_content.py b/pyexcel_io/readers/csv_content.py new file mode 100644 index 0000000..21a96c1 --- /dev/null +++ b/pyexcel_io/readers/csv_content.py @@ -0,0 +1,27 @@ +import mmap + +from pyexcel_io.book import _convert_content_to_stream +from pyexcel_io.readers.csv_sheet import CSVMemoryMapIterator +from pyexcel_io.readers.csv_in_memory import MemoryReader + + +class ContentReader(MemoryReader): + def __init__(self, file_content, file_type, **keywords): + file_stream = ContentReader.convert_content_to_stream( + file_content, file_type, **keywords + ) + super().__init__(file_stream, file_type, **keywords) + + @staticmethod + def convert_content_to_stream(file_content, file_type, **keywords): + encoding = keywords.get("encoding", "utf-8") + if isinstance(file_content, mmap.mmap): + # load from mmap + file_stream = CSVMemoryMapIterator(file_content, encoding) + else: + if isinstance(file_content, bytes): + file_content = file_content.decode(encoding) + + file_stream = _convert_content_to_stream(file_content, file_type) + + return file_stream diff --git a/pyexcel_io/readers/csv_in_file.py b/pyexcel_io/readers/csv_in_file.py new file mode 100644 index 0000000..b926242 --- /dev/null +++ b/pyexcel_io/readers/csv_in_file.py @@ -0,0 +1,64 @@ +import os +import re +import glob + +from pyexcel_io import constants +from pyexcel_io.sheet import NamedContent +from pyexcel_io.plugin_api import IReader +from pyexcel_io.readers.csv_sheet import CSVFileReader + +DEFAULT_NEWLINE = "\r\n" + + +class FileReader(IReader): + def __init__(self, file_name, file_type, **keywords): + """Load content from a file + :params str filename: an accessible file path + :returns: a book + """ + self.handles = [] + self.keywords = keywords + if file_type == constants.FILE_FORMAT_TSV: + self.keywords["dialect"] = constants.KEYWORD_TSV_DIALECT + self.__line_terminator = keywords.get( + constants.KEYWORD_LINE_TERMINATOR, DEFAULT_NEWLINE + ) + names = os.path.splitext(file_name) + filepattern = "%s%s*%s*%s" % ( + names[0], + constants.DEFAULT_MULTI_CSV_SEPARATOR, + constants.DEFAULT_MULTI_CSV_SEPARATOR, + names[1], + ) + filelist = glob.glob(filepattern) + if len(filelist) == 0: + file_parts = os.path.split(file_name) + self.content_array = [NamedContent(file_parts[-1], file_name)] + + else: + matcher = "%s%s(.*)%s(.*)%s" % ( + names[0], + constants.DEFAULT_MULTI_CSV_SEPARATOR, + constants.DEFAULT_MULTI_CSV_SEPARATOR, + names[1], + ) + tmp_file_list = [] + for filen in filelist: + result = re.match(matcher, filen) + tmp_file_list.append((result.group(1), result.group(2), filen)) + ret = [] + for lsheetname, index, filen in sorted( + tmp_file_list, key=lambda row: row[1] + ): + ret.append(NamedContent(lsheetname, filen)) + self.content_array = ret + + def read_sheet(self, index): + reader = CSVFileReader(self.content_array[index], **self.keywords) + self.handles.append(reader) + return reader + + def close(self): + for reader in self.handles: + reader.close() + self.handles = [] diff --git a/pyexcel_io/readers/csv_in_memory.py b/pyexcel_io/readers/csv_in_memory.py new file mode 100644 index 0000000..eb8ba1d --- /dev/null +++ b/pyexcel_io/readers/csv_in_memory.py @@ -0,0 +1,62 @@ +import re + +import pyexcel_io._compact as compact +from pyexcel_io import constants +from pyexcel_io.sheet import NamedContent +from pyexcel_io.plugin_api import IReader +from pyexcel_io.readers.csv_sheet import CSVinMemoryReader + +DEFAULT_SHEET_SEPARATOR_FORMATTER = f"---{constants.DEFAULT_NAME}---%s" + + +class MemoryReader(IReader): + def __init__( + self, file_stream, file_type, multiple_sheets=False, **keywords + ): + """Load content from memory + :params stream file_content: the actual file content in memory + :returns: a book + """ + self.handles = [] + self.keywords = keywords + if file_type == constants.FILE_FORMAT_TSV: + self.keywords["dialect"] = constants.KEYWORD_TSV_DIALECT + self.file_type = file_type + + self.__load_from_memory_flag = True + self.__line_terminator = keywords.get( + constants.KEYWORD_LINE_TERMINATOR, constants.DEFAULT_CSV_NEWLINE + ) + separator = DEFAULT_SHEET_SEPARATOR_FORMATTER % self.__line_terminator + if multiple_sheets: + # will be slow for large files + file_stream.seek(0) + content = file_stream.read() + sheets = content.split(separator) + named_contents = [] + for sheet in sheets: + if sheet == "": # skip empty named sheet + continue + + lines = sheet.split(self.__line_terminator) + result = re.match(constants.SEPARATOR_MATCHER, lines[0]) + new_content = "\n".join(lines[1:]) + new_sheet = NamedContent( + result.group(1), compact.StringIO(new_content) + ) + named_contents.append(new_sheet) + self.content_array = named_contents + + else: + if hasattr(file_stream, "seek"): + file_stream.seek(0) + self.content_array = [NamedContent(self.file_type, file_stream)] + + def read_sheet(self, index): + reader = CSVinMemoryReader(self.content_array[index], **self.keywords) + self.handles.append(reader) + return reader + + def close(self): + for reader in self.handles: + reader.close() diff --git a/pyexcel_io/readers/csv_sheet.py b/pyexcel_io/readers/csv_sheet.py new file mode 100644 index 0000000..fd6f76c --- /dev/null +++ b/pyexcel_io/readers/csv_sheet.py @@ -0,0 +1,192 @@ +""" + pyexcel_io.readers.csv_sheet + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + csv file reader + + :copyright: (c) 2014-2022 by Onni Software Ltd. + :license: New BSD License, see LICENSE for more details +""" +import csv + +import pyexcel_io.service as service +import pyexcel_io._compact as compact +import pyexcel_io.constants as constants +from pyexcel_io.plugin_api import ISheet + +DEFAULT_SEPARATOR = "__" +DEFAULT_SHEET_SEPARATOR_FORMATTER = "---%s---" % constants.DEFAULT_NAME + "%s" +SEPARATOR_MATCHER = "---%s:(.*)---" % constants.DEFAULT_NAME +DEFAULT_CSV_STREAM_FILE_FORMATTER = ( + "---%s:" % constants.DEFAULT_NAME + "%s---%s" +) +DEFAULT_NEWLINE = "\r\n" +BOM_LITTLE_ENDIAN = b"\xff\xfe" +BOM_BIG_ENDIAN = b"\xfe\ff" +LITTLE_ENDIAN = 0 +BIG_ENDIAN = 1 + + +class CSVMemoryMapIterator(object): + """ + Wrapper class for mmap object + + mmap object does not handle encoding at all. This class + provide the necessary transcoding for utf-8, utf-16 and utf-32 + """ + + def __init__(self, mmap_obj, encoding): + self.__mmap_obj = mmap_obj + self.__encoding = encoding + self.__count = 0 + self.__endian = LITTLE_ENDIAN + if encoding == "utf-8": + # ..\r\x00\n + # \x00\x.. + self.__zeros_left_in_2_row = 0 + elif encoding == "utf-16": + # ..\r\x00\n + # \x00\x.. + self.__zeros_left_in_2_row = 1 + elif encoding == "utf-32": + # \r\x00\x00\x00\n + # \x00\x00\x00\x.. + self.__zeros_left_in_2_row = 3 + elif encoding in ["utf-32-be", "utf-16-be"]: + self.__zeros_left_in_2_row = 0 + self.__endian = BIG_ENDIAN + elif encoding == "utf-32-le": + self.__zeros_left_in_2_row = 3 + self.__endian = LITTLE_ENDIAN + elif encoding == "utf-16-le": + self.__zeros_left_in_2_row = 1 + self.__endian = LITTLE_ENDIAN + else: + raise Exception("Encoding %s is not supported" % encoding) + + def __iter__(self): + return self + + def __next__(self): + line = self.__mmap_obj.readline() + if self.__count == 0: + utf_16_32 = ( + self.__encoding == "utf-16" or self.__encoding == "utf-32" + ) + if utf_16_32: + bom_header = line[:2] + if bom_header == BOM_BIG_ENDIAN: + self.__endian = BIG_ENDIAN + elif self.__endian == LITTLE_ENDIAN: + line = line[self.__zeros_left_in_2_row :] # noqa: E203 + if self.__endian == LITTLE_ENDIAN: + line = line.rstrip() + line = line.decode(self.__encoding) + self.__count += 1 + if line == "": + raise StopIteration + + return line + + def close(self): + pass + + +class CSVSheetReader(ISheet): + """generic csv file reader""" + + def __init__( + self, + sheet, + encoding="utf-8", + auto_detect_float=True, + ignore_infinity=True, + auto_detect_int=True, + auto_detect_datetime=True, + pep_0515_off=True, + ignore_nan_text=False, + default_float_nan=None, + **keywords + ): + self._native_sheet = sheet + self._encoding = encoding + self.__auto_detect_int = auto_detect_int + self.__auto_detect_float = auto_detect_float + self.__ignore_infinity = ignore_infinity + self.__auto_detect_datetime = auto_detect_datetime + self.__file_handle = None + self.__pep_0515_off = pep_0515_off + self.__ignore_nan_text = ignore_nan_text + self.__default_float_nan = default_float_nan + self._keywords = keywords + + def get_file_handle(self): + """return me unicde reader for csv""" + raise NotImplementedError("Please implement get_file_handle()") + + def row_iterator(self): + self.__file_handle = self.get_file_handle() + return csv.reader(self.__file_handle, **self._keywords) + + def column_iterator(self, row): + for element in row: + if element is not None and element != "": + element = self.__convert_cell(element) + yield element + + def __convert_cell(self, csv_cell_text): + ret = None + if self.__auto_detect_int: + ret = service.detect_int_value(csv_cell_text, self.__pep_0515_off) + if ret is None and self.__auto_detect_float: + ret = service.detect_float_value( + csv_cell_text, + self.__pep_0515_off, + ignore_nan_text=self.__ignore_nan_text, + default_float_nan=self.__default_float_nan, + ) + shall_we_ignore_the_conversion = ( + ret in [float("inf"), float("-inf")] + ) and self.__ignore_infinity + if shall_we_ignore_the_conversion: + ret = None + if ret is None and self.__auto_detect_datetime: + ret = service.detect_date_value(csv_cell_text) + if ret is None: + ret = csv_cell_text + return ret + + def close(self): + if self.__file_handle: + self.__file_handle.close() + + +# else: means the generator has been run +# yes, no run, no file open. + + +class CSVFileReader(CSVSheetReader): + """read csv from phyical file""" + + def get_file_handle(self): + unicode_reader = open( + self._native_sheet.payload, "r", encoding=self._encoding + ) + return unicode_reader + + +class CSVinMemoryReader(CSVSheetReader): + """read csv file from memory""" + + def get_file_handle(self): + if isinstance(self._native_sheet.payload, compact.BytesIO): + # please note that + # if the end developer feed us bytesio in python3 + # we will do the conversion to StriongIO but that + # comes at a cost. + content = self._native_sheet.payload.read() + unicode_reader = compact.StringIO(content.decode(self._encoding)) + else: + unicode_reader = self._native_sheet.payload + + return unicode_reader diff --git a/pyexcel_io/readers/csvr.py b/pyexcel_io/readers/csvr.py deleted file mode 100644 index 6a1472a..0000000 --- a/pyexcel_io/readers/csvr.py +++ /dev/null @@ -1,369 +0,0 @@ -""" - pyexcel_io.readers.csvr - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - csv file reader - - :copyright: (c) 2014-2017 by Onni Software Ltd. - :license: New BSD License, see LICENSE for more details -""" -import re -import os -import csv -import glob -import codecs - -from pyexcel_io.book import BookReader -from pyexcel_io.sheet import SheetReader, NamedContent -import pyexcel_io._compact as compact -import pyexcel_io.constants as constants -import pyexcel_io.service as service - - -DEFAULT_SEPARATOR = "__" -DEFAULT_SHEET_SEPARATOR_FORMATTER = "---%s---" % constants.DEFAULT_NAME + "%s" -SEPARATOR_MATCHER = "---%s:(.*)---" % constants.DEFAULT_NAME -DEFAULT_CSV_STREAM_FILE_FORMATTER = ( - "---%s:" % constants.DEFAULT_NAME + "%s---%s" -) -DEFAULT_NEWLINE = "\r\n" -BOM_LITTLE_ENDIAN = b"\xff\xfe" -BOM_BIG_ENDIAN = b"\xfe\ff" -LITTLE_ENDIAN = 0 -BIG_ENDIAN = 1 - - -class CSVMemoryMapIterator(compact.Iterator): - """ - Wrapper class for mmap object - - mmap object does not handle encoding at all. This class - provide the necessary transcoding for utf-8, utf-16 and utf-32 - """ - - def __init__(self, mmap_obj, encoding): - self.__mmap_obj = mmap_obj - self.__encoding = encoding - self.__count = 0 - self.__endian = LITTLE_ENDIAN - if encoding == "utf-8": - # ..\r\x00\n - # \x00\x.. - self.__zeros_left_in_2_row = 0 - elif encoding == "utf-16": - # ..\r\x00\n - # \x00\x.. - self.__zeros_left_in_2_row = 1 - elif encoding == "utf-32": - # \r\x00\x00\x00\n - # \x00\x00\x00\x.. - self.__zeros_left_in_2_row = 3 - elif encoding == "utf-32-be" or encoding == "utf-16-be": - self.__zeros_left_in_2_row = 0 - self.__endian = BIG_ENDIAN - elif encoding == "utf-32-le": - self.__zeros_left_in_2_row = 3 - self.__endian = LITTLE_ENDIAN - elif encoding == "utf-16-le": - self.__zeros_left_in_2_row = 1 - self.__endian = LITTLE_ENDIAN - else: - raise Exception("Encoding %s is not supported" % encoding) - - def __iter__(self): - return self - - def __next__(self): - line = self.__mmap_obj.readline() - if self.__count == 0: - utf_16_32 = ( - self.__encoding == "utf-16" or self.__encoding == "utf-32" - ) - if utf_16_32: - bom_header = line[:2] - if bom_header == BOM_BIG_ENDIAN: - self.__endian = BIG_ENDIAN - elif self.__endian == LITTLE_ENDIAN: - line = line[self.__zeros_left_in_2_row :] # noqa: E203 - if self.__endian == LITTLE_ENDIAN: - line = line.rstrip() - line = line.decode(self.__encoding) - self.__count += 1 - if line == "": - raise StopIteration - - if compact.PY2: - # python 2 requires utf-8 encoded string for reading - line = line.encode("utf-8") - return line - - -class UTF8Recorder(compact.Iterator): - """ - Iterator that reads an encoded stream and reencodes the input to UTF-8. - """ - - def __init__(self, file_handle, encoding): - self.__file_handle = file_handle - self.reader = codecs.getreader(encoding)(file_handle) - - def close(self): - self.__file_handle.close() - - def __iter__(self): - return self - - def __next__(self): - # python 2 requires utf-8 encoded string for reading - line = next(self.reader).encode("utf-8") - return line - - -class CSVSheetReader(SheetReader): - """ generic csv file reader""" - - def __init__( - self, - sheet, - encoding="utf-8", - auto_detect_float=True, - ignore_infinity=True, - auto_detect_int=True, - auto_detect_datetime=True, - pep_0515_off=True, - ignore_nan_text=False, - default_float_nan=None, - **keywords - ): - SheetReader.__init__(self, sheet, **keywords) - self._encoding = encoding - self.__auto_detect_int = auto_detect_int - self.__auto_detect_float = auto_detect_float - self.__ignore_infinity = ignore_infinity - self.__auto_detect_datetime = auto_detect_datetime - self.__file_handle = None - self.__pep_0515_off = pep_0515_off - self.__ignore_nan_text = ignore_nan_text - self.__default_float_nan = default_float_nan - - def get_file_handle(self): - """ return me unicde reader for csv """ - raise NotImplementedError("Please implement get_file_handle()") - - def row_iterator(self): - self.__file_handle = self.get_file_handle() - return csv.reader(self.__file_handle, **self._keywords) - - def column_iterator(self, row): - for element in row: - if compact.PY2: - element = element.decode("utf-8") - if element is not None and element != "": - element = self.__convert_cell(element) - yield element - - def __convert_cell(self, csv_cell_text): - ret = None - if self.__auto_detect_int: - ret = service.detect_int_value(csv_cell_text, self.__pep_0515_off) - if ret is None and self.__auto_detect_float: - ret = service.detect_float_value( - csv_cell_text, - self.__pep_0515_off, - ignore_nan_text=self.__ignore_nan_text, - default_float_nan=self.__default_float_nan, - ) - shall_we_ignore_the_conversion = ( - ret in [float("inf"), float("-inf")] - ) and self.__ignore_infinity - if shall_we_ignore_the_conversion: - ret = None - if ret is None and self.__auto_detect_datetime: - ret = service.detect_date_value(csv_cell_text) - if ret is None: - ret = csv_cell_text - return ret - - def close(self): - if self.__file_handle: - self.__file_handle.close() - - -# else: means the generator has been run -# yes, no run, no file open. - - -class CSVFileReader(CSVSheetReader): - """ read csv from phyical file """ - - def get_file_handle(self): - unicode_reader = None - if compact.PY2: - file_handle = open(self._native_sheet.payload, "rb") - unicode_reader = UTF8Recorder(file_handle, self._encoding) - else: - unicode_reader = open( - self._native_sheet.payload, "r", encoding=self._encoding - ) - return unicode_reader - - -class CSVinMemoryReader(CSVSheetReader): - """ read csv file from memory """ - - def get_file_handle(self): - unicode_reader = None - if compact.PY2: - if hasattr(self._native_sheet.payload, "read"): - unicode_reader = UTF8Recorder( - self._native_sheet.payload, self._encoding - ) - else: - unicode_reader = self._native_sheet.payload - else: - if isinstance(self._native_sheet.payload, compact.BytesIO): - # please note that - # if the end developer feed us bytesio in python3 - # we will do the conversion to StriongIO but that - # comes at a cost. - content = self._native_sheet.payload.read() - unicode_reader = compact.StringIO( - content.decode(self._encoding) - ) - else: - unicode_reader = self._native_sheet.payload - - return unicode_reader - - -class CSVBookReader(BookReader): - """ read csv file """ - - def __init__(self): - BookReader.__init__(self) - self._file_type = constants.FILE_FORMAT_CSV - self._file_content = None - self.__load_from_memory_flag = False - self.__line_terminator = constants.DEFAULT_CSV_NEWLINE - self.__sheet_name = None - self.__sheet_index = None - self.__multiple_sheets = False - self.__readers = [] - - def open(self, file_name, **keywords): - BookReader.open(self, file_name, **keywords) - self._native_book = self._load_from_file() - - def open_stream(self, file_stream, multiple_sheets=False, **keywords): - BookReader.open_stream(self, file_stream, **keywords) - self.__multiple_sheets = multiple_sheets - self._native_book = self._load_from_stream() - - def open_content(self, file_content, **keywords): - try: - import mmap - - encoding = keywords.get("encoding", "utf-8") - if isinstance(file_content, mmap.mmap): - # load from mmap - self.__multiple_sheets = keywords.get("multiple_sheets", False) - self._file_stream = CSVMemoryMapIterator( - file_content, encoding - ) - self._keywords = keywords - self._native_book = self._load_from_stream() - else: - if compact.PY3_ABOVE: - if isinstance(file_content, bytes): - file_content = file_content.decode(encoding) - # else python 2.7 does not care about bytes nor str - BookReader.open_content(self, file_content, **keywords) - except ImportError: - # python 2.6 or Google app engine - BookReader.open_content(self, file_content, **keywords) - - def read_sheet(self, native_sheet): - if self.__load_from_memory_flag: - reader = CSVinMemoryReader(native_sheet, **self._keywords) - else: - reader = CSVFileReader(native_sheet, **self._keywords) - self.__readers.append(reader) - return reader.to_array() - - def close(self): - for reader in self.__readers: - reader.close() - - def _load_from_stream(self): - """Load content from memory - - :params stream file_content: the actual file content in memory - :returns: a book - """ - self.__load_from_memory_flag = True - self.__line_terminator = self._keywords.get( - constants.KEYWORD_LINE_TERMINATOR, self.__line_terminator - ) - separator = DEFAULT_SHEET_SEPARATOR_FORMATTER % self.__line_terminator - if self.__multiple_sheets: - # will be slow for large files - self._file_stream.seek(0) - content = self._file_stream.read() - sheets = content.split(separator) - named_contents = [] - for sheet in sheets: - if sheet == "": # skip empty named sheet - continue - - lines = sheet.split(self.__line_terminator) - result = re.match(constants.SEPARATOR_MATCHER, lines[0]) - new_content = "\n".join(lines[1:]) - new_sheet = NamedContent( - result.group(1), compact.StringIO(new_content) - ) - named_contents.append(new_sheet) - return named_contents - - else: - if hasattr(self._file_stream, "seek"): - self._file_stream.seek(0) - return [NamedContent(self._file_type, self._file_stream)] - - def _load_from_file(self): - """Load content from a file - - :params str filename: an accessible file path - :returns: a book - """ - self.__line_terminator = self._keywords.get( - constants.KEYWORD_LINE_TERMINATOR, self.__line_terminator - ) - names = os.path.splitext(self._file_name) - filepattern = "%s%s*%s*%s" % ( - names[0], - constants.DEFAULT_MULTI_CSV_SEPARATOR, - constants.DEFAULT_MULTI_CSV_SEPARATOR, - names[1], - ) - filelist = glob.glob(filepattern) - if len(filelist) == 0: - file_parts = os.path.split(self._file_name) - return [NamedContent(file_parts[-1], self._file_name)] - - else: - matcher = "%s%s(.*)%s(.*)%s" % ( - names[0], - constants.DEFAULT_MULTI_CSV_SEPARATOR, - constants.DEFAULT_MULTI_CSV_SEPARATOR, - names[1], - ) - tmp_file_list = [] - for filen in filelist: - result = re.match(matcher, filen) - tmp_file_list.append((result.group(1), result.group(2), filen)) - ret = [] - for lsheetname, index, filen in sorted( - tmp_file_list, key=lambda row: row[1] - ): - ret.append(NamedContent(lsheetname, filen)) - return ret diff --git a/pyexcel_io/readers/csvz.py b/pyexcel_io/readers/csvz.py index bfb1956..cae0383 100644 --- a/pyexcel_io/readers/csvz.py +++ b/pyexcel_io/readers/csvz.py @@ -4,69 +4,56 @@ The lower level csvz file format handler. - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ import zipfile +from io import BytesIO -from pyexcel_io._compact import StringIO, PY2 -from pyexcel_io.book import BookReader -from pyexcel_io.constants import FILE_FORMAT_CSVZ - -from .csvr import CSVinMemoryReader, NamedContent +import chardet +from pyexcel_io import constants +from pyexcel_io.sheet import NamedContent +from pyexcel_io._compact import StringIO +from pyexcel_io.readers.csv_sheet import CSVinMemoryReader +from pyexcel_io.plugin_api.abstract_reader import IReader -class CSVZipBookReader(BookReader): - """csvz reader - - Read zipped csv file that was zipped up by pyexcel-io. It support - single csv file and multiple csv files. - """ - - def __init__(self): - BookReader.__init__(self) - self._file_type = FILE_FORMAT_CSVZ - self.zipfile = None - - def open(self, file_name, **keywords): - BookReader.open(self, file_name, **keywords) - self._native_book = self._load_from_file_alike_object(self._file_name) - - def open_stream(self, file_stream, **keywords): - BookReader.open_stream(self, file_stream, **keywords) - self._native_book = self._load_from_file_alike_object( - self._file_stream - ) - - def read_sheet(self, native_sheet): - content = self.zipfile.read(native_sheet.payload) - if PY2: - sheet = StringIO(content) - else: - sheet = StringIO(content.decode("utf-8")) - - reader = CSVinMemoryReader( - NamedContent(native_sheet.name, sheet), **self._keywords - ) - return reader.to_array() - - def close(self): - if self.zipfile: - self.zipfile.close() - - def _load_from_file_alike_object(self, file_alike_object): +class FileReader(IReader): + def __init__(self, file_alike_object, file_type, **keywords): + self.content_array = [] try: self.zipfile = zipfile.ZipFile(file_alike_object, "r") sheets = [ NamedContent(_get_sheet_name(name), name) for name in self.zipfile.namelist() ] - return sheets + self.content_array = sheets + self.keywords = keywords + if file_type == constants.FILE_FORMAT_TSVZ: + self.keywords["dialect"] = constants.KEYWORD_TSV_DIALECT except zipfile.BadZipfile: print("StringIO instance was passed by any chance?") raise + def close(self): + if self.zipfile: + self.zipfile.close() + + def read_sheet(self, index): + name = self.content_array[index].name + content = self.zipfile.read(self.content_array[index].payload) + encoding_guess = chardet.detect(content) + sheet = StringIO(content.decode(encoding_guess["encoding"])) + + return CSVinMemoryReader(NamedContent(name, sheet), **self.keywords) + + +class ContentReader(FileReader): + def __init__(self, file_content, file_type, **keywords): + io = BytesIO(file_content) + super().__init__(io, file_type, **keywords) + def _get_sheet_name(filename): len_of_a_dot = 1 diff --git a/pyexcel_io/readers/tsv.py b/pyexcel_io/readers/tsv.py deleted file mode 100644 index 8ac135a..0000000 --- a/pyexcel_io/readers/tsv.py +++ /dev/null @@ -1,27 +0,0 @@ -""" - pyexcel_io.readers.tsv - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - The lower level tsv file format handler. - - :copyright: (c) 2014-2017 by Onni Software Ltd. - :license: New BSD License, see LICENSE for more details -""" -import pyexcel_io.constants as constants -from .csvr import CSVBookReader - - -class TSVBookReader(CSVBookReader): - """ Read tab separated values """ - - def __init__(self): - CSVBookReader.__init__(self) - self._file_type = constants.FILE_FORMAT_TSV - - def open(self, file_name, **keywords): - keywords["dialect"] = constants.KEYWORD_TSV_DIALECT - CSVBookReader.open(self, file_name, **keywords) - - def open_stream(self, file_content, **keywords): - keywords["dialect"] = constants.KEYWORD_TSV_DIALECT - CSVBookReader.open_stream(self, file_content, **keywords) diff --git a/pyexcel_io/readers/tsvz.py b/pyexcel_io/readers/tsvz.py deleted file mode 100644 index 9dc7d6d..0000000 --- a/pyexcel_io/readers/tsvz.py +++ /dev/null @@ -1,31 +0,0 @@ -""" - pyexcel_io.fileformat.tsvz - ~~~~~~~~~~~~~~~~~~~~~~~~~~ - - The lower level tsvz file format handler. - - :copyright: (c) 2014-2017 by Onni Software Ltd. - :license: New BSD License, see LICENSE for more details -""" -from pyexcel_io.constants import FILE_FORMAT_TSVZ, KEYWORD_TSV_DIALECT - -from .csvz import CSVZipBookReader - - -class TSVZipBookReader(CSVZipBookReader): - """ read zipped tab separated value file - - it supports single tsv file and mulitple tsv files - """ - - def __init__(self): - CSVZipBookReader.__init__(self) - self._file_type = FILE_FORMAT_TSVZ - - def open(self, file_name, **keywords): - keywords["dialect"] = KEYWORD_TSV_DIALECT - CSVZipBookReader.open(self, file_name, **keywords) - - def open_stream(self, file_content, **keywords): - keywords["dialect"] = KEYWORD_TSV_DIALECT - CSVZipBookReader.open_stream(self, file_content, **keywords) diff --git a/pyexcel_io/service.py b/pyexcel_io/service.py index fc562bb..958ee1e 100644 --- a/pyexcel_io/service.py +++ b/pyexcel_io/service.py @@ -4,16 +4,14 @@ provide service code to downstream projects - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ import re import math import datetime -from pyexcel_io._compact import PY2 -from pyexcel_io import constants -from pyexcel_io import exceptions +from pyexcel_io import constants, exceptions def has_no_digits_in_float(value): @@ -175,13 +173,10 @@ ODS_WRITE_FORMAT_COVERSION = { datetime.date: "date", datetime.time: "time", datetime.timedelta: "timedelta", + datetime.datetime: "datetime", bool: "boolean", } -if PY2: - ODS_WRITE_FORMAT_COVERSION[unicode] = "string" # noqa: F821 - ODS_WRITE_FORMAT_COVERSION[long] = "throw_exception" # noqa: F821 - VALUE_CONVERTERS = { "float": float_value, @@ -234,7 +229,7 @@ ODS_VALUE_CONVERTERS = { "boolean": ods_bool_value, "timedelta": ods_timedelta_value, "float": ods_float_value, - "throw_exception": throw_exception + "long": ods_float_value, } diff --git a/pyexcel_io/sheet.py b/pyexcel_io/sheet.py index 5d2abfb..da730e3 100644 --- a/pyexcel_io/sheet.py +++ b/pyexcel_io/sheet.py @@ -4,22 +4,13 @@ The io interface to file extensions - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -from pyexcel_io._compact import irange -from pyexcel_io.utils import _index_filter import pyexcel_io.constants as constants - - -class NamedContent(object): - """ - Helper class for content that does not have a name - """ - - def __init__(self, name, payload): - self.name = name - self.payload = payload +from pyexcel_io.utils import _index_filter +from pyexcel_io._compact import irange +from pyexcel_io.plugin_api import NamedContent # noqa: F401 class SheetReader(object): @@ -38,11 +29,12 @@ class SheetReader(object): skip_column_func=None, skip_empty_rows=False, row_renderer=None, - **keywords + keep_trailing_empty_cells=False, + **deprecated_use_of_keywords_here ): self._native_sheet = sheet self._keywords = {} - self._keywords.update(keywords) + self._keywords.update(deprecated_use_of_keywords_here) self._start_row = start_row self._row_limit = row_limit self._start_column = start_column @@ -51,6 +43,7 @@ class SheetReader(object): self._skip_column = _index_filter self._skip_empty_rows = skip_empty_rows self._row_renderer = row_renderer + self.keep_trailing_empty_cells = keep_trailing_empty_cells if skip_row_func: self._skip_row = skip_row_func @@ -58,8 +51,7 @@ class SheetReader(object): self._skip_column = skip_column_func def to_array(self): - """2 dimentional representation of the content - """ + """2 dimentional representation of the content""" for row_index, row in enumerate(self.row_iterator()): row_position = self._skip_row( row_index, self._start_row, self._row_limit @@ -85,10 +77,13 @@ class SheetReader(object): elif column_position == constants.STOP_ITERATION: break - tmp_row.append(cell_value) - if cell_value is not None and cell_value != "": - return_row += tmp_row - tmp_row = [] + if self.keep_trailing_empty_cells: + return_row.append(cell_value) + else: + tmp_row.append(cell_value) + if cell_value is not None and cell_value != "": + return_row += tmp_row + tmp_row = [] if self._skip_empty_rows and len(return_row) < 1: # we by-pass next yeild here # because it is an empty row diff --git a/pyexcel_io/utils.py b/pyexcel_io/utils.py index 1413786..f6f890d 100644 --- a/pyexcel_io/utils.py +++ b/pyexcel_io/utils.py @@ -4,36 +4,45 @@ utility functions - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ import pyexcel_io.constants as constants - XLS_PLUGIN = "pyexcel-xls" XLSX_PLUGIN = "pyexcel-xlsx" ODS_PLUGIN = "pyexcel-ods" ODS3_PLUGIN = "pyexcel-ods3" +ODSR_PLUGIN = "pyexcel-odsr" +ODSW_PLUGIN = "pyexcel-odsw" +XLSXR_PLUGIN = "pyexcel-xlsxr" XLSXW_PLUGIN = "pyexcel-xlsxw" +XLSBR_PLUGIN = "pyexcel-xlsbr" +HTMLR_PLUGIN = "pyexcel-htmlr" +PDFR_PLUGIN = "pyexcel-pdfr" IO_ITSELF = "pyexcel-io" +AVAILABLE_NEW_READERS = {} AVAILABLE_READERS = { + constants.FILE_FORMAT_CSV: [IO_ITSELF], constants.FILE_FORMAT_XLS: [XLS_PLUGIN], constants.FILE_FORMAT_XLSX: [XLS_PLUGIN, XLSX_PLUGIN], constants.FILE_FORMAT_XLSM: [XLS_PLUGIN, XLSX_PLUGIN], - constants.FILE_FORMAT_ODS: [ODS_PLUGIN, ODS3_PLUGIN], - constants.FILE_FORMAT_CSV: [IO_ITSELF], + constants.FILE_FORMAT_ODS: [ODS_PLUGIN, ODS3_PLUGIN, ODSR_PLUGIN], constants.FILE_FORMAT_TSV: [IO_ITSELF], constants.FILE_FORMAT_CSVZ: [IO_ITSELF], constants.FILE_FORMAT_TSVZ: [IO_ITSELF], + constants.FILE_FORMAT_XLSB: [XLSBR_PLUGIN], + constants.FILE_FORMAT_HTML: [HTMLR_PLUGIN], + constants.FILE_FORMAT_PDF: [PDFR_PLUGIN], } AVAILABLE_WRITERS = { constants.FILE_FORMAT_XLS: [XLS_PLUGIN], constants.FILE_FORMAT_XLSX: [XLSX_PLUGIN, XLSXW_PLUGIN], constants.FILE_FORMAT_XLSM: [XLSX_PLUGIN], - constants.FILE_FORMAT_ODS: [ODS_PLUGIN, ODS3_PLUGIN], + constants.FILE_FORMAT_ODS: [ODS_PLUGIN, ODS3_PLUGIN, ODSW_PLUGIN], constants.FILE_FORMAT_CSV: [IO_ITSELF], constants.FILE_FORMAT_TSV: [IO_ITSELF], constants.FILE_FORMAT_CSVZ: [IO_ITSELF], @@ -60,10 +69,10 @@ def is_empty_array(array): def swap_empty_string_for_none(array): - """ replace empty string fields with None """ + """replace empty string fields with None""" def swap(value): - """ change empty string to None """ + """change empty string to None""" if value == "": return None diff --git a/pyexcel_io/writer.py b/pyexcel_io/writer.py new file mode 100644 index 0000000..5c41b7f --- /dev/null +++ b/pyexcel_io/writer.py @@ -0,0 +1,51 @@ +from pyexcel_io.plugins import NEW_WRITERS + + +class Writer(object): + def __init__(self, file_type, library=None): + self.file_type = file_type + self.library = library + self.keyboards = None + # if you know which reader class to use, this attribute allows + # you to set reader class externally. Since there is no + # so call private field in Python, I am not going to create + # useless setter and getter functions like Java. + # in pyexcel, this attribute is mainly used for testing + self.writer_class = None + + def open(self, file_name, **keywords): + if self.writer_class is None: + self.writer_class = NEW_WRITERS.get_a_plugin( + self.file_type, library=self.library, location="file" + ) + self.writer = self.writer_class(file_name, self.file_type, **keywords) + + def open_content(self, file_stream, **keywords): + if self.writer_class is None: + self.writer_class = NEW_WRITERS.get_a_plugin( + self.file_type, library=self.library, location="content" + ) + self.writer = self.writer_class( + file_stream, self.file_type, **keywords + ) + + def open_stream(self, file_stream, **keywords): + if self.writer_class is None: + self.writer_class = NEW_WRITERS.get_a_plugin( + self.file_type, library=self.library, location="memory" + ) + self.writer = self.writer_class( + file_stream, self.file_type, **keywords + ) + + def write(self, incoming_dict): + self.writer.write(incoming_dict) + + def close(self): + self.writer.close() + + def __enter__(self): + return self + + def __exit__(self, a_type, value, traceback): + self.close() diff --git a/pyexcel_io/writers/__init__.py b/pyexcel_io/writers/__init__.py index bc5d767..2beeaf7 100644 --- a/pyexcel_io/writers/__init__.py +++ b/pyexcel_io/writers/__init__.py @@ -4,26 +4,24 @@ file writers - :copyright: (c) 2014-2017 by Onni Software Ltd. + :copyright: (c) 2014-2022 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -from pyexcel_io.plugins import IOPluginInfoChain +from pyexcel_io.plugins import IOPluginInfoChainV2 - -IOPluginInfoChain(__name__).add_a_writer( - relative_plugin_class_path="csvw.CSVBookWriter", - file_types=["csv"], +IOPluginInfoChainV2(__name__).add_a_writer( + relative_plugin_class_path="csv_in_file.CsvFileWriter", + locations=["file", "content"], + file_types=["csv", "tsv"], stream_type="text", ).add_a_writer( - relative_plugin_class_path="tsv.TSVBookWriter", - file_types=["tsv"], + relative_plugin_class_path="csv_in_memory.CsvMemoryWriter", + locations=["memory"], + file_types=["csv", "tsv"], stream_type="text", ).add_a_writer( - relative_plugin_class_path="csvz.CSVZipBookWriter", - file_types=["csvz"], - stream_type="binary", -).add_a_writer( - relative_plugin_class_path="tsvz.TSVZipBookWriter", - file_types=["tsvz"], + relative_plugin_class_path="csvz_writer.CsvZipWriter", + locations=["memory", "file", "content"], + file_types=["csvz", "tsvz"], stream_type="binary", ) diff --git a/pyexcel_io/writers/csv_in_file.py b/pyexcel_io/writers/csv_in_file.py new file mode 100644 index 0000000..46b23ac --- /dev/null +++ b/pyexcel_io/writers/csv_in_file.py @@ -0,0 +1,29 @@ +from pyexcel_io import constants +from pyexcel_io.plugin_api import IWriter +from pyexcel_io.writers.csv_sheet import CSVFileWriter + + +class CsvFileWriter(IWriter): + def __init__(self, file_alike_object, file_type, **keywords): + self._file_alike_object = file_alike_object + self._keywords = keywords + if file_type == constants.FILE_FORMAT_TSV: + self._keywords["dialect"] = constants.KEYWORD_TSV_DIALECT + self.__index = 0 + self.handlers = [] + + def create_sheet(self, name): + writer = CSVFileWriter( + self._file_alike_object, + name, + sheet_index=self.__index, + **self._keywords + ) + self.__index = self.__index + 1 + self.handlers.append(writer) + return writer + + def close(self): + for writer in self.handlers: + writer.close() + self.handlers = [] diff --git a/pyexcel_io/writers/csv_in_memory.py b/pyexcel_io/writers/csv_in_memory.py new file mode 100644 index 0000000..e768f4c --- /dev/null +++ b/pyexcel_io/writers/csv_in_memory.py @@ -0,0 +1,26 @@ +from pyexcel_io import constants +from pyexcel_io.plugin_api import IWriter +from pyexcel_io.writers.csv_sheet import CSVMemoryWriter + + +class CsvMemoryWriter(IWriter): + def __init__(self, file_alike_object, file_type, **keywords): + self._file_alike_object = file_alike_object + self._keywords = keywords + if file_type == constants.FILE_FORMAT_TSV: + self._keywords["dialect"] = constants.KEYWORD_TSV_DIALECT + self.__index = 0 + + def create_sheet(self, name): + writer_class = CSVMemoryWriter + writer = writer_class( + self._file_alike_object, + name, + sheet_index=self.__index, + **self._keywords + ) + self.__index = self.__index + 1 + return writer + + def close(self): + pass diff --git a/pyexcel_io/writers/csv_sheet.py b/pyexcel_io/writers/csv_sheet.py new file mode 100644 index 0000000..422dbed --- /dev/null +++ b/pyexcel_io/writers/csv_sheet.py @@ -0,0 +1,98 @@ +""" + pyexcel_io.writers.csv_sheet + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + The lower level csv file format writer + + :copyright: (c) 2014-2022 by Onni Software Ltd. + :license: New BSD License, see LICENSE for more details +""" +import csv + +import pyexcel_io.constants as constants +from pyexcel_io.plugin_api import ISheetWriter + + +class CSVFileWriter(ISheetWriter): + """ + csv file writer + + """ + + def __init__( + self, + filename, + name, + encoding="utf-8", + single_sheet_in_book=False, + sheet_index=None, + **keywords + ): + self._encoding = encoding + self._sheet_name = name + if self._sheet_name is None or single_sheet_in_book: + self._sheet_name = constants.DEFAULT_SHEET_NAME + self._single_sheet_in_book = single_sheet_in_book + self.__line_terminator = constants.DEFAULT_CSV_NEWLINE + self._keywords = keywords + if constants.KEYWORD_LINE_TERMINATOR in keywords: + self.__line_terminator = keywords.get( + constants.KEYWORD_LINE_TERMINATOR + ) + self._sheet_index = sheet_index + self.file_handle = None + self._native_book = filename + + self.writer = self.get_writer() + + def get_writer(self): + if self._sheet_name != constants.DEFAULT_SHEET_NAME: + names = self._native_book.split(".") + file_name = "%s%s%s%s%s.%s" % ( + names[0], + constants.DEFAULT_MULTI_CSV_SEPARATOR, + self._sheet_name, # sheet name + constants.DEFAULT_MULTI_CSV_SEPARATOR, + self._sheet_index, # sheet index + names[1], + ) + else: + file_name = self._native_book + + self.file_handle = open( + file_name, "w", newline="", encoding=self._encoding + ) + return csv.writer(self.file_handle, **self._keywords) + + def write_row(self, array): + """ + write a row into the file + """ + self.writer.writerow(array) + + def close(self): + self.file_handle.close() + + +class CSVMemoryWriter(CSVFileWriter): + """Write csv to a memory stream""" + + def get_writer(self): + self.file_handle = self._native_book + writer = csv.writer(self.file_handle, **self._keywords) + if not self._single_sheet_in_book: + writer.writerow( + [ + constants.DEFAULT_CSV_STREAM_FILE_FORMATTER + % (self._sheet_name, "") + ] + ) + return writer + + def close(self): + if self._single_sheet_in_book: + # on purpose, the this is not done + # because the io stream can be used later + pass + else: + self.writer.writerow([constants.SEPARATOR_FORMATTER % ""]) diff --git a/pyexcel_io/writers/csvw.py b/pyexcel_io/writers/csvw.py deleted file mode 100644 index f9cd7bb..0000000 --- a/pyexcel_io/writers/csvw.py +++ /dev/null @@ -1,192 +0,0 @@ -""" - pyexcel_io.writers.csvw - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - The lower level csv file format writer - - :copyright: (c) 2014-2017 by Onni Software Ltd. - :license: New BSD License, see LICENSE for more details -""" -import csv -import codecs - -from pyexcel_io.book import BookWriter -from pyexcel_io.sheet import SheetWriter -import pyexcel_io._compact as compact -import pyexcel_io.constants as constants - - -class UnicodeWriter(object): - """ - A CSV writer which will write rows to CSV file "f", - which is encoded in the given encoding. - """ - - def __init__(self, file_handle, encoding="utf-8", **kwds): - # Redirect output to a queue - self.queue = compact.StringIO() - self.writer = csv.writer(self.queue, **kwds) - self.stream = file_handle - self.encoder = codecs.getincrementalencoder(encoding)() - - def writerow(self, row): - """ write row into the csv file """ - self.writer.writerow( - [compact.text_type(s).encode("utf-8") for s in row] - ) - # Fetch UTF-8 output from the queue ... - data = self.queue.getvalue() - data = data.decode("utf-8") - # ... and reencode it into the target encoding - data = self.encoder.encode(data) - # write to the target stream - self.stream.write(data) - # empty queue - self.queue.truncate(0) - - def writerows(self, rows): - """ write multiple rows into csv file """ - for row in rows: - self.writerow(row) - - -class CSVSheetWriter(SheetWriter): - """ - csv file writer - - """ - - def __init__( - self, - filename, - name, - encoding="utf-8", - single_sheet_in_book=False, - sheet_index=None, - **keywords - ): - self._encoding = encoding - self._sheet_name = name - self._single_sheet_in_book = single_sheet_in_book - self.__line_terminator = constants.DEFAULT_CSV_NEWLINE - if constants.KEYWORD_LINE_TERMINATOR in keywords: - self.__line_terminator = keywords.get( - constants.KEYWORD_LINE_TERMINATOR - ) - if single_sheet_in_book: - self._sheet_name = None - self._sheet_index = sheet_index - self.writer = None - self.file_handle = None - SheetWriter.__init__( - self, filename, self._sheet_name, self._sheet_name, **keywords - ) - - def write_row(self, array): - """ - write a row into the file - """ - self.writer.writerow(array) - - -class CSVFileWriter(CSVSheetWriter): - """ Write csv to a physical file """ - - def close(self): - self.file_handle.close() - - def set_sheet_name(self, name): - if name != constants.DEFAULT_SHEET_NAME: - names = self._native_book.split(".") - file_name = "%s%s%s%s%s.%s" % ( - names[0], - constants.DEFAULT_MULTI_CSV_SEPARATOR, - name, # sheet name - constants.DEFAULT_MULTI_CSV_SEPARATOR, - self._sheet_index, # sheet index - names[1], - ) - else: - file_name = self._native_book - if compact.PY2: - self.file_handle = open(file_name, "wb") - self.writer = UnicodeWriter( - self.file_handle, encoding=self._encoding, **self._keywords - ) - else: - self.file_handle = open( - file_name, "w", newline="", encoding=self._encoding - ) - self.writer = csv.writer(self.file_handle, **self._keywords) - - -class CSVMemoryWriter(CSVSheetWriter): - """ Write csv to a memory stream """ - - def __init__( - self, - filename, - name, - encoding="utf-8", - single_sheet_in_book=False, - sheet_index=None, - **keywords - ): - CSVSheetWriter.__init__( - self, - filename, - name, - encoding=encoding, - single_sheet_in_book=single_sheet_in_book, - sheet_index=sheet_index, - **keywords - ) - - def set_sheet_name(self, name): - if compact.PY2: - self.file_handle = self._native_book - self.writer = UnicodeWriter( - self.file_handle, encoding=self._encoding, **self._keywords - ) - else: - self.file_handle = self._native_book - self.writer = csv.writer(self.file_handle, **self._keywords) - if not self._single_sheet_in_book: - self.writer.writerow( - [ - constants.DEFAULT_CSV_STREAM_FILE_FORMATTER - % (self._sheet_name, "") - ] - ) - - def close(self): - if self._single_sheet_in_book: - # on purpose, the this is not done - # because the io stream can be used later - pass - else: - self.writer.writerow([constants.SEPARATOR_FORMATTER % ""]) - - -class CSVBookWriter(BookWriter): - """ write csv with unicode support """ - - def __init__(self): - BookWriter.__init__(self) - self._file_type = constants.FILE_FORMAT_CSV - self.__index = 0 - - def create_sheet(self, name): - writer_class = None - if compact.is_string(type(self._file_alike_object)): - writer_class = CSVFileWriter - else: - writer_class = CSVMemoryWriter - writer = writer_class( - self._file_alike_object, - name, - sheet_index=self.__index, - **self._keywords - ) - self.__index = self.__index + 1 - return writer diff --git a/pyexcel_io/writers/csvz.py b/pyexcel_io/writers/csvz.py deleted file mode 100644 index 2eb3237..0000000 --- a/pyexcel_io/writers/csvz.py +++ /dev/null @@ -1,73 +0,0 @@ -""" - pyexcel_io.fileformat.csvz - ~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - The lower level csvz file format handler. - - :copyright: (c) 2014-2017 by Onni Software Ltd. - :license: New BSD License, see LICENSE for more details -""" -import zipfile - -from pyexcel_io._compact import StringIO, PY2 -from pyexcel_io.book import BookWriter -from pyexcel_io.constants import DEFAULT_SHEET_NAME, FILE_FORMAT_CSVZ - -from .csvw import CSVSheetWriter, UnicodeWriter - - -class CSVZipSheetWriter(CSVSheetWriter): - """ handle the zipfile interface """ - - def __init__(self, zipfile, sheetname, file_extension, **keywords): - self.file_extension = file_extension - keywords["single_sheet_in_book"] = False - CSVSheetWriter.__init__(self, zipfile, sheetname, **keywords) - - def set_sheet_name(self, name): - self.content = StringIO() - if PY2: - self.writer = UnicodeWriter( - self.content, encoding=self._encoding, **self._keywords - ) - else: - import csv - - self.writer = csv.writer(self.content, **self._keywords) - - def close(self): - file_name = "%s.%s" % (self._native_sheet, self.file_extension) - self.content.seek(0) - self._native_book.writestr(file_name, self.content.read()) - self.content.close() - - -class CSVZipBookWriter(BookWriter): - """ - csvz writer - - It is better to store csv files as a csvz as it saves your disk space. - Pyexcel-io had the facility to unzip it for you or you could use - any other unzip software. - """ - - def __init__(self): - BookWriter.__init__(self) - self._file_type = FILE_FORMAT_CSVZ - self.zipfile = None - - def open(self, file_name, **keywords): - BookWriter.open(self, file_name, **keywords) - self.zipfile = zipfile.ZipFile(file_name, "w", zipfile.ZIP_DEFLATED) - - def create_sheet(self, name): - given_name = name - if given_name is None: - given_name = DEFAULT_SHEET_NAME - writer = CSVZipSheetWriter( - self.zipfile, given_name, self._file_type[:3], **self._keywords - ) - return writer - - def close(self): - self.zipfile.close() diff --git a/pyexcel_io/writers/csvz_sheet.py b/pyexcel_io/writers/csvz_sheet.py new file mode 100644 index 0000000..fe0cd22 --- /dev/null +++ b/pyexcel_io/writers/csvz_sheet.py @@ -0,0 +1,32 @@ +""" + pyexcel_io.fileformat.csvz_sheet + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + The lower level csvz file format handler. + + :copyright: (c) 2014-2022 by Onni Software Ltd. + :license: New BSD License, see LICENSE for more details +""" +import csv + +from pyexcel_io._compact import StringIO +from pyexcel_io.writers.csv_sheet import CSVFileWriter + + +class CSVZipSheetWriter(CSVFileWriter): + """handle the zipfile interface""" + + def __init__(self, zipfile, sheetname, file_extension, **keywords): + self.file_extension = file_extension + keywords["single_sheet_in_book"] = False + self.content = StringIO() + super().__init__(zipfile, sheetname, **keywords) + + def get_writer(self): + return csv.writer(self.content, **self._keywords) + + def close(self): + file_name = "%s.%s" % (self._sheet_name, self.file_extension) + self.content.seek(0) + self._native_book.writestr(file_name, self.content.read()) + self.content.close() diff --git a/pyexcel_io/writers/csvz_writer.py b/pyexcel_io/writers/csvz_writer.py new file mode 100644 index 0000000..7461ecc --- /dev/null +++ b/pyexcel_io/writers/csvz_writer.py @@ -0,0 +1,35 @@ +import zipfile + +from pyexcel_io import constants +from pyexcel_io.plugin_api import IWriter +from pyexcel_io.writers.csvz_sheet import CSVZipSheetWriter + + +class CsvZipWriter(IWriter): + """ + csvz writer + + It is better to store csv files as a csvz as it saves your disk space. + Pyexcel-io had the facility to unzip it for you or you could use + any other unzip software. + """ + + def __init__(self, file_name, file_type, **keywords): + self._file_type = file_type + self.zipfile = zipfile.ZipFile(file_name, "w", zipfile.ZIP_DEFLATED) + self._keywords = keywords + if file_type == constants.FILE_FORMAT_TSVZ: + self._keywords["dialect"] = constants.KEYWORD_TSV_DIALECT + + def create_sheet(self, name): + given_name = name + if given_name is None: + given_name = constants.DEFAULT_SHEET_NAME + writer = CSVZipSheetWriter( + self.zipfile, given_name, self._file_type[:3], **self._keywords + ) + return writer + + def close(self): + if self.zipfile: + self.zipfile.close() diff --git a/pyexcel_io/writers/tsv.py b/pyexcel_io/writers/tsv.py deleted file mode 100644 index 778372b..0000000 --- a/pyexcel_io/writers/tsv.py +++ /dev/null @@ -1,23 +0,0 @@ -""" - pyexcel_io.fileformat.tsv - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - The lower level tsv file format handler. - - :copyright: (c) 2014-2017 by Onni Software Ltd. - :license: New BSD License, see LICENSE for more details -""" -import pyexcel_io.constants as constants -from .csvw import CSVBookWriter - - -class TSVBookWriter(CSVBookWriter): - """ write tsv """ - - def __init__(self): - CSVBookWriter.__init__(self) - self._file_type = constants.FILE_FORMAT_TSV - - def open(self, file_name, **keywords): - keywords["dialect"] = constants.KEYWORD_TSV_DIALECT - CSVBookWriter.open(self, file_name, **keywords) diff --git a/pyexcel_io/writers/tsvz.py b/pyexcel_io/writers/tsvz.py deleted file mode 100644 index b12814d..0000000 --- a/pyexcel_io/writers/tsvz.py +++ /dev/null @@ -1,27 +0,0 @@ -""" - pyexcel_io.fileformat.tsvz - ~~~~~~~~~~~~~~~~~~~~~~~~~~ - - The lower level tsvz file format handler. - - :copyright: (c) 2014-2017 by Onni Software Ltd. - :license: New BSD License, see LICENSE for more details -""" -from pyexcel_io.constants import FILE_FORMAT_TSVZ, KEYWORD_TSV_DIALECT - -from .csvz import CSVZipBookWriter - - -class TSVZipBookWriter(CSVZipBookWriter): - """ write zipped tsv file - - It is similiar to CSVZipBookWriter, but support tab separated values - """ - - def __init__(self): - CSVZipBookWriter.__init__(self) - self._file_type = FILE_FORMAT_TSVZ - - def open(self, file_name, **keywords): - keywords["dialect"] = KEYWORD_TSV_DIALECT - CSVZipBookWriter.open(self, file_name, **keywords) diff --git a/requirements.txt b/requirements.txt index dac001a..8539e81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -ordereddict;python_version<"2.7" lml>=0.0.4 diff --git a/rnd_requirements.txt b/rnd_requirements.txt index 5d515e1..8b13789 100644 --- a/rnd_requirements.txt +++ b/rnd_requirements.txt @@ -1,5 +1 @@ -. -https://github.com/chfw/lml/archive/master.zip -https://github.com/pyexcel/pyexcel/archive/master.zip -https://github.com/pyexcel/pyexcel-xls/archive/master.zip diff --git a/setup.py b/setup.py index 6d1e8bc..90e27da 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,9 @@ #!/usr/bin/env python3 -# Template by pypi-mobans +""" +Template by pypi-mobans +""" + import os import sys import codecs @@ -22,84 +25,86 @@ PY33 = sys.version_info < (3, 4) try: lc = locale.getlocale() pf = platform.system() - if pf != 'Windows' and lc == (None, None): - locale.setlocale(locale.LC_ALL, 'C.UTF-8') + if pf != "Windows" and lc == (None, None): + locale.setlocale(locale.LC_ALL, "C.UTF-8") except (ValueError, UnicodeError, locale.Error): - locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') + locale.setlocale(locale.LC_ALL, "en_US.UTF-8") -NAME = 'pyexcel-io' -AUTHOR = 'C.W.' -VERSION = '0.5.13' -EMAIL = 'wangc_2011@hotmail.com' -LICENSE = 'New BSD' +NAME = "pyexcel-io" +AUTHOR = "C.W." +VERSION = "0.6.6" +EMAIL = "info@pyexcel.org" +LICENSE = "New BSD" DESCRIPTION = ( - 'A python library to read and write structured data in csv, zipped csv' + - 'format and to/from databases' + "A python library to read and write structured data in csv, zipped csv" + + "format and to/from databases" ) -URL = 'https://github.com/pyexcel/pyexcel-io' -DOWNLOAD_URL = '%s/archive/0.5.13.tar.gz' % URL -FILES = ['README.rst', 'CHANGELOG.rst'] +URL = "https://github.com/pyexcel/pyexcel-io" +DOWNLOAD_URL = "%s/archive/0.6.6.tar.gz" % URL +FILES = ["README.rst", "CHANGELOG.rst"] KEYWORDS = [ - 'python', - 'API', - 'tsv', - 'tsvz', - 'csv', - 'csvz', - 'django', - 'sqlalchemy', + "python", + "API", + "tsv", + "tsvz", + "csv", + "csvz", + "django", + "sqlalchemy", ] CLASSIFIERS = [ - 'Topic :: Software Development :: Libraries', - 'Programming Language :: Python', - 'Intended Audience :: Developers', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', + "Topic :: Software Development :: Libraries", + "Programming Language :: Python", + "Intended Audience :: Developers", + + "Programming Language :: Python :: 3 :: Only", + + + + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + 'License :: OSI Approved :: BSD License', 'Programming Language :: Python :: Implementation :: PyPy' ] +PYTHON_REQUIRES = ">=3.6" + INSTALL_REQUIRES = [ - 'lml>=0.0.4', + "lml>=0.0.4", ] SETUP_COMMANDS = {} -if PY26: - INSTALL_REQUIRES.append('ordereddict') - -PACKAGES = find_packages(exclude=['ez_setup', 'examples', 'tests']) +PACKAGES = find_packages(exclude=["ez_setup", "examples", "tests", "tests.*"]) EXTRAS_REQUIRE = { - 'xls': ['pyexcel-xls>=0.5.0'], - 'xlsx': ['pyexcel-xlsx>=0.5.0'], - 'ods': ['pyexcel-ods3>=0.5.0'], + "xls": ['pyexcel-xls>=0.6.0'], + "xlsx": ['pyexcel-xlsx>=0.6.0'], + "ods": ['pyexcel-ods3>=0.6.0'], } # You do not need to read beyond this line -PUBLISH_COMMAND = '{0} setup.py sdist bdist_wheel upload -r pypi'.format( - sys.executable) -GS_COMMAND = ('gs pyexcel-io v0.5.13 ' + - "Find 0.5.13 in changelog for more details") -NO_GS_MESSAGE = ('Automatic github release is disabled. ' + - 'Please install gease to enable it.') +PUBLISH_COMMAND = "{0} setup.py sdist bdist_wheel upload -r pypi".format(sys.executable) +HERE = os.path.abspath(os.path.dirname(__file__)) + +GS_COMMAND = ("gease pyexcel-io v0.6.6 " + + "Find 0.6.6 in changelog for more details") +NO_GS_MESSAGE = ("Automatic github release is disabled. " + + "Please install gease to enable it.") UPLOAD_FAILED_MSG = ( 'Upload failed. please run "%s" yourself.' % PUBLISH_COMMAND) -HERE = os.path.abspath(os.path.dirname(__file__)) class PublishCommand(Command): """Support setup.py upload.""" - description = 'Build and publish the package on github and pypi' + description = "Build and publish the package on github and pypi" user_options = [] @staticmethod def status(s): """Prints things in bold.""" - print('\033[1m{0}\033[0m'.format(s)) + print("\033[1m{0}\033[0m".format(s)) def initialize_options(self): pass @@ -109,14 +114,14 @@ class PublishCommand(Command): def run(self): try: - self.status('Removing previous builds...') - rmtree(os.path.join(HERE, 'dist')) - rmtree(os.path.join(HERE, 'build')) - rmtree(os.path.join(HERE, 'pyexcel_io.egg-info')) + self.status("Removing previous builds...") + rmtree(os.path.join(HERE, "dist")) + rmtree(os.path.join(HERE, "build")) + rmtree(os.path.join(HERE, "pyexcel_io.egg-info")) except OSError: pass - self.status('Building Source and Wheel (universal) distribution...') + self.status("Building Source and Wheel (universal) distribution...") run_status = True if has_gease(): run_status = os.system(GS_COMMAND) == 0 @@ -124,16 +129,15 @@ class PublishCommand(Command): self.status(NO_GS_MESSAGE) if run_status: if os.system(PUBLISH_COMMAND) != 0: - self.status(UPLOAD_FAILED_MSG % PUBLISH_COMMAND) + self.status(UPLOAD_FAILED_MSG) sys.exit() SETUP_COMMANDS.update({ - 'publish': PublishCommand + "publish": PublishCommand }) - def has_gease(): """ test if github release command is installed @@ -159,7 +163,7 @@ def read_files(*files): def read(afile): """Read a file into setup""" the_relative_file = os.path.join(HERE, afile) - with codecs.open(the_relative_file, 'r', 'utf-8') as opened_file: + with codecs.open(the_relative_file, "r", "utf-8") as opened_file: content = filter_out_test_code(opened_file) content = "".join(list(content)) return content @@ -168,11 +172,11 @@ def read(afile): def filter_out_test_code(file_handle): found_test_code = False for line in file_handle.readlines(): - if line.startswith('.. testcode:'): + if line.startswith(".. testcode:"): found_test_code = True continue if found_test_code is True: - if line.startswith(' '): + if line.startswith(" "): continue else: empty_line = line.strip() @@ -182,14 +186,14 @@ def filter_out_test_code(file_handle): found_test_code = False yield line else: - for keyword in ['|version|', '|today|']: + for keyword in ["|version|", "|today|"]: if keyword in line: break else: yield line -if __name__ == '__main__': +if __name__ == "__main__": setup( test_suite="tests", name=NAME, @@ -202,8 +206,9 @@ if __name__ == '__main__': long_description=read_files(*FILES), license=LICENSE, keywords=KEYWORDS, + python_requires=PYTHON_REQUIRES, extras_require=EXTRAS_REQUIRE, - tests_require=['nose'], + tests_require=["nose"], install_requires=INSTALL_REQUIRES, packages=PACKAGES, include_package_data=True, diff --git a/test.bat b/test.bat index b083787..9e24a8c 100644 --- a/test.bat +++ b/test.bat @@ -1,2 +1,2 @@ pip freeze -nosetests --with-coverage --cover-package pyexcel_io --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io && flake8 . --exclude=.moban.d,docs --builtins=unicode,xrange,long +nosetests --with-coverage --cover-package pyexcel_io --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io diff --git a/test.sh b/test.sh index b083787..7526ea1 100644 --- a/test.sh +++ b/test.sh @@ -1,2 +1,3 @@ +#/bin/bash pip freeze -nosetests --with-coverage --cover-package pyexcel_io --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io && flake8 . --exclude=.moban.d,docs --builtins=unicode,xrange,long +nosetests --with-coverage --cover-package pyexcel_io --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..4f03500 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# needed for Python 2.7, python setup.py test to discover tests directory diff --git a/tests/requirements.txt b/tests/requirements.txt index 796e17f..be90c0a 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -3,6 +3,13 @@ mock;python_version<"3" codecov coverage flake8 +black +isort +collective.checkdocs +pygments +moban +moban_jinja2_github +pyexcel +pyexcel-xls==0.5.9 SQLAlchemy -pyexcel>=0.2.0 -pyexcel-xls>=0.1.0 +pyexcel-xlsxw diff --git a/tests/test_base.py b/tests/test_base.py index 351e7f0..df4dfb1 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -1,6 +1,7 @@ -from pyexcel_io.sheet import SheetReader, SheetWriter, NamedContent from pyexcel_io.book import BookWriter +from pyexcel_io.sheet import SheetReader, SheetWriter, NamedContent from pyexcel_io.utils import is_empty_array + from nose.tools import raises diff --git a/tests/test_book.py b/tests/test_book.py index 226c6ca..bb6b20e 100644 --- a/tests/test_book.py +++ b/tests/test_book.py @@ -1,5 +1,12 @@ +from pyexcel_io.book import ( + BookReader, + BookWriter, + RWInterface, + _convert_content_to_stream, +) +from pyexcel_io._compact import BytesIO, StringIO + from nose.tools import raises -from pyexcel_io.book import RWInterface, BookReader, BookWriter @raises(NotImplementedError) @@ -30,3 +37,15 @@ def test_book_reader_open_stream(): def test_book_writer(): writer = BookWriter() writer.open_stream("a string") + + +def test_convert_to_bytes_stream(): + file_content = b"test" + stream = _convert_content_to_stream(file_content, "csv") + assert isinstance(stream, StringIO) + + +def test_convert_to_string_stream(): + file_content = "test" + stream = _convert_content_to_stream(file_content, "csvz") + assert isinstance(stream, BytesIO) diff --git a/tests/test_csv_book.py b/tests/test_csv_book.py index 0c55869..53730de 100644 --- a/tests/test_csv_book.py +++ b/tests/test_csv_book.py @@ -1,19 +1,20 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - import os -from unittest import TestCase from textwrap import dedent -from nose.tools import raises, eq_ +from unittest import TestCase + import pyexcel_io.manager as manager +from pyexcel_io import get_data from pyexcel_io.sheet import NamedContent -from pyexcel_io.readers.csvr import ( - CSVSheetReader, +from pyexcel_io.reader import EncapsulatedSheetReader +from pyexcel_io._compact import BytesIO, StringIO +from pyexcel_io.readers.csv_sheet import ( CSVFileReader, + CSVSheetReader, CSVinMemoryReader, ) -from pyexcel_io.writers.csvw import CSVFileWriter, CSVMemoryWriter -from pyexcel_io._compact import BytesIO, PY2, StringIO +from pyexcel_io.writers.csv_sheet import CSVFileWriter, CSVMemoryWriter + +from nose.tools import eq_, raises class TestReaders(TestCase): @@ -32,7 +33,9 @@ class TestReaders(TestCase): sheet.get_file_handle() def test_sheet_file_reader(self): - r = CSVFileReader(NamedContent(self.file_type, self.test_file)) + r = EncapsulatedSheetReader( + CSVFileReader(NamedContent(self.file_type, self.test_file)) + ) result = list(r.to_array()) self.assertEqual(result, self.expected_data) @@ -41,7 +44,10 @@ class TestReaders(TestCase): with open(self.test_file, "r") as f: io.write(f.read()) io.seek(0) - r = CSVinMemoryReader(NamedContent(self.file_type, io)) + r = EncapsulatedSheetReader( + CSVinMemoryReader(NamedContent(self.file_type, io)) + ) + result = list(r.to_array()) self.assertEqual(result, self.expected_data) @@ -108,20 +114,37 @@ class TestNonUniformCSV(TestCase): f.write(",".join(row) + "\n") def test_sheet_file_reader(self): - r = CSVFileReader(NamedContent(self.file_type, self.test_file)) + r = EncapsulatedSheetReader( + CSVFileReader(NamedContent(self.file_type, self.test_file)) + ) result = list(r.to_array()) self.assertEqual(result, [[1], [4, 5, 6], ["", 7]]) + def test_sheet_file_reader_with_trailing_empty_cells(self): + r = EncapsulatedSheetReader( + CSVFileReader(NamedContent(self.file_type, self.test_file)), + keep_trailing_empty_cells=True, + ) + result = list(r.to_array()) + self.assertEqual(result, [[1], [4, 5, 6, "", ""], ["", 7]]) + + def test_get_data_with_trailing_empty_cells(self): + result = get_data(self.test_file, keep_trailing_empty_cells=True) + self.assertEqual( + result[self.test_file], [[1], [4, 5, 6, "", ""], ["", 7]] + ) + def tearDown(self): os.unlink(self.test_file) def test_utf16_decoding(): test_file = os.path.join("tests", "fixtures", "csv-encoding-utf16.csv") - reader = CSVFileReader(NamedContent("csv", test_file), encoding="utf-16") + reader = EncapsulatedSheetReader( + CSVFileReader(NamedContent("csv", test_file), encoding="utf-16") + ) + content = list(reader.to_array()) - if PY2: - content[0] = [s.encode("utf-8") for s in content[0]] expected = [["Äkkilähdöt", "Matkakirjoituksia", "Matkatoimistot"]] eq_(content, expected) @@ -136,8 +159,6 @@ def test_utf16_encoding(): writer.close() with open(test_file, "rb") as f: actual = f.read().decode("utf-16") - if PY2: - actual = actual.encode("utf-8") eq_(actual, "Äkkilähdöt,Matkakirjoituksia,Matkatoimistot\n") os.unlink(test_file) @@ -145,12 +166,11 @@ def test_utf16_encoding(): def test_utf16_memory_decoding(): test_content = u"Äkkilähdöt,Matkakirjoituksia,Matkatoimistot" test_content = BytesIO(test_content.encode("utf-16")) - reader = CSVinMemoryReader( - NamedContent("csv", test_content), encoding="utf-16" + reader = EncapsulatedSheetReader( + CSVinMemoryReader(NamedContent("csv", test_content), encoding="utf-16") ) + content = list(reader.to_array()) - if PY2: - content[0] = [s.encode("utf-8") for s in content[0]] expected = [["Äkkilähdöt", "Matkakirjoituksia", "Matkatoimistot"]] eq_(content, expected) @@ -167,6 +187,4 @@ def test_utf16_memory_encoding(): ) writer.write_array(content) actual = io.getvalue() - if PY2: - actual = actual.decode("utf-16") eq_(actual, u"Äkkilähdöt,Matkakirjoituksia,Matkatoimistot\n") diff --git a/tests/test_django_book.py b/tests/test_django_book.py index 17fca83..5ccdd01 100644 --- a/tests/test_django_book.py +++ b/tests/test_django_book.py @@ -1,21 +1,23 @@ -from nose.tools import raises, eq_ from pyexcel_io import save_data +from pyexcel_io.reader import EncapsulatedSheetReader from pyexcel_io._compact import OrderedDict from pyexcel_io.constants import DB_DJANGO from pyexcel_io.database.common import ( - DjangoModelImporter, - DjangoModelImportAdapter, DjangoModelExporter, + DjangoModelImporter, DjangoModelExportAdapter, -) -from pyexcel_io.database.importers.django import ( - DjangoModelWriter, - DjangoBookWriter, + DjangoModelImportAdapter, ) from pyexcel_io.database.exporters.django import ( - DjangoModelReader, DjangoBookReader, + DjangoModelReader, ) +from pyexcel_io.database.importers.django import ( + DjangoBookWriter, + DjangoModelWriter, +) + +from nose.tools import eq_, raises class Package: @@ -156,10 +158,13 @@ class TestSheet: writer = DjangoModelWriter(None, adapter) writer.write_array(self.data[1:]) writer.close() - assert model.objects.objs == [ - {"Y": 2, "X": 2, "Z": 3}, - {"Y": 5, "X": 5, "Z": 6}, - ] + eq_( + model.objects.objs, + [ + {"Y": 2, "X": 2, "Z": 3}, + {"Y": 5, "X": 5, "Z": 6}, + ], + ) def test_sheet_save_to_django_model_skip_me(self): model = FakeDjangoModel() @@ -176,7 +181,7 @@ class TestSheet: writer = DjangoModelWriter(None, adapter) writer.write_array(self.data[1:]) writer.close() - assert model.objects.objs == [{"Y": 2, "X": 1, "Z": 3}] + eq_(model.objects.objs, [{"Y": 2, "X": 1, "Z": 3}]) def test_load_sheet_from_django_model(self): model = FakeDjangoModel() @@ -209,7 +214,9 @@ class TestSheet: return [str(element) for element in row] # the key point of this test case - reader = DjangoModelReader(model, row_renderer=row_renderer) + reader = EncapsulatedSheetReader( + DjangoModelReader(model), row_renderer=row_renderer + ) data = reader.to_array() expected = [["X", "Y", "Z"], ["1", "2", "3"], ["4", "5", "6"]] eq_(list(data), expected) @@ -237,6 +244,18 @@ class TestSheet: writer.close() eq_(model.objects.objs, self.result) + def test_jumping_columns(self): + data2 = [["D", "A", "B", "C"], [1, 1, 2, 3], [10, 4, 5, 6]] + mapdict = {"C": "Z", "A": "X", "B": "Y"} + model = FakeDjangoModel() + adapter = DjangoModelImportAdapter(model) + adapter.column_names = data2[0] + adapter.column_name_mapping_dict = mapdict + writer = DjangoModelWriter(None, adapter) + writer.write_array(data2[1:]) + writer.close() + eq_(model.objects.objs, self.result) + def test_empty_model(self): model = FakeDjangoModel() reader = DjangoModelReader(model) @@ -279,8 +298,7 @@ class TestMultipleModels: adapter1.get_name(): self.content["Sheet1"][1:], adapter2.get_name(): self.content["Sheet2"][1:], } - writer = DjangoBookWriter() - writer.open_content(importer, batch_size=sample_size) + writer = DjangoBookWriter(importer, "django", batch_size=sample_size) writer.write(to_store) writer.close() assert model1.objects.objs == self.result1 @@ -302,8 +320,9 @@ class TestMultipleModels: adapter1.get_name(): self.content["Sheet1"][1:], adapter2.get_name(): self.content["Sheet2"][1:], } - writer = DjangoBookWriter() - writer.open_content(importer, batch_size=sample_size, bulk_save=False) + writer = DjangoBookWriter( + importer, "django", batch_size=sample_size, bulk_save=False + ) writer.write(to_store) writer.close() assert model1.objects.objs == [] @@ -334,12 +353,11 @@ class TestMultipleModels: adapter2 = DjangoModelExportAdapter(model2) exporter.append(adapter1) exporter.append(adapter2) - reader = DjangoBookReader() - reader.open_content(exporter) - data = reader.read_all() - for key in data.keys(): - data[key] = list(data[key]) - assert data == self.content + reader = DjangoBookReader(exporter, "django") + result = read_all(reader) + for key in result: + result[key] = list(result[key]) + eq_(result, self.content) @raises(Exception) def test_special_case_where_only_one_model_used(self): @@ -353,28 +371,6 @@ class TestMultipleModels: "Sheet2": self.content["Sheet2"][1:], } save_data(importer, to_store, file_type=DB_DJANGO) - assert model1.objects.objs == self.result1 - model1._meta.model_name = "Sheet1" - model1._meta.update(["X", "Y", "Z"]) - exporter = DjangoModelExporter() - adapter = DjangoModelExportAdapter(model1) - exporter.append(adapter) - reader = DjangoBookReader() - reader.open_content(exporter) - data = reader.read_all() - assert list(data["Sheet1"]) == self.content["Sheet1"] - - -@raises(TypeError) -def test_not_implemented_method(): - reader = DjangoBookReader() - reader.open("afile") - - -@raises(TypeError) -def test_not_implemented_method_2(): - reader = DjangoBookReader() - reader.open_stream("afile") class TestFilter: @@ -393,25 +389,33 @@ class TestFilter: self.model._meta.update(["X", "Y", "Z"]) def test_load_sheet_from_django_model_with_filter(self): - reader = DjangoModelReader(self.model, start_row=0, row_limit=2) + reader = EncapsulatedSheetReader( + DjangoModelReader(self.model), start_row=0, row_limit=2 + ) data = reader.to_array() expected = [["X", "Y", "Z"], [1, 2, 3]] eq_(list(data), expected) def test_load_sheet_from_django_model_with_filter_1(self): - reader = DjangoModelReader(self.model, start_row=1, row_limit=3) + reader = EncapsulatedSheetReader( + DjangoModelReader(self.model), start_row=1, row_limit=3 + ) data = reader.to_array() expected = [[1, 2, 3], [4, 5, 6]] eq_(list(data), expected) def test_load_sheet_from_django_model_with_filter_2(self): - reader = DjangoModelReader(self.model, start_column=1) + reader = EncapsulatedSheetReader( + DjangoModelReader(self.model), start_column=1 + ) data = reader.to_array() expected = [["Y", "Z"], [2, 3], [5, 6]] eq_(list(data), expected) def test_load_sheet_from_django_model_with_filter_3(self): - reader = DjangoModelReader(self.model, start_column=1, column_limit=1) + reader = EncapsulatedSheetReader( + DjangoModelReader(self.model), start_column=1, column_limit=1 + ) data = reader.to_array() expected = [["Y"], [2], [5]] eq_(list(data), expected) @@ -422,3 +426,10 @@ def test_django_model_import_adapter(): adapter.column_names = ["a"] adapter.row_initializer = "abc" eq_(adapter.row_initializer, "abc") + + +def read_all(reader): + result = OrderedDict() + for index, sheet in enumerate(reader.content_array): + result.update({sheet.name: reader.read_sheet(index).to_array()}) + return result diff --git a/tests/test_filter.py b/tests/test_filter.py index fd7bcf8..a3d3591 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -1,9 +1,10 @@ import os +import pyexcel_io.constants as constants from pyexcel_io import get_data, save_data from pyexcel_io.utils import _index_filter + from nose.tools import eq_ -import pyexcel_io.constants as constants def test_index_filter(): diff --git a/tests/test_io.py b/tests/test_io.py index 8670c18..cf14ca4 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,18 +1,20 @@ import os -import sys import types +from zipfile import BadZipfile from unittest import TestCase + import pyexcel_io.manager as manager import pyexcel_io.exceptions as exceptions -from pyexcel_io._compact import StringIO, BytesIO, is_string -from pyexcel_io._compact import OrderedDict -from pyexcel_io import save_data, get_data, iget_data +from pyexcel_io import get_data, iget_data, save_data from pyexcel_io.io import load_data, get_writer -from nose.tools import raises, eq_ -from zipfile import BadZipfile +from pyexcel_io._compact import BytesIO, StringIO, OrderedDict, is_string + +from nose.tools import eq_, raises -PY2 = sys.version_info[0] == 2 +@raises(IOError) +def test_directory_name_as_file(): + get_data("/") def test_force_file_type(): @@ -24,6 +26,20 @@ def test_force_file_type(): eq_(expected, data[test_file]) +def test_force_file_type_on_write(): + test_file = "force_file_type_on_write.txt" + save_data(test_file, {"sheet 1": [[1, 2]]}, force_file_type="csv") + data = get_data(test_file, force_file_type="csv") + expected = [[1, 2]] + eq_(expected, data[test_file]) + os.unlink(test_file) + + +@raises(IOError) +def test_invalid_file(): + load_data("/something/does/not/exist") + + @raises(IOError) def test_no_valid_parameters(): load_data() @@ -49,14 +65,14 @@ def test_wrong_parameter_to_get_writer(): get_writer(1) -@raises(Exception) -def test_wrong_parameter_to_get_writer2(): - get_writer(1, file_type="csv") +# @raises(Exception) +# def test_wrong_parameter_to_get_writer2(): +# get_writer(1, file_type="csv") def test_load_ods_data(): msg = "Please install one of these plugins for read data in 'ods': " - msg += "pyexcel-ods,pyexcel-ods3" + msg += "pyexcel-ods,pyexcel-ods3,pyexcel-odsr" try: get_data("test.ods") except exceptions.SupportingPluginAvailableButNotInstalled as e: @@ -66,7 +82,7 @@ def test_load_ods_data(): def test_load_ods_data_from_memory(): io = BytesIO() msg = "Please install one of these plugins for read data in 'ods': " - msg += "pyexcel-ods,pyexcel-ods3" + msg += "pyexcel-ods,pyexcel-ods3,pyexcel-odsr" try: get_data(io, file_type="ods") except exceptions.SupportingPluginAvailableButNotInstalled as e: @@ -84,7 +100,7 @@ def test_write_xlsx_data_to_memory(): eq_(str(e), msg) -@raises(exceptions.NoSupportingPluginFound) +@raises(IOError) def test_load_unknown_data(): get_data("test.unknown") @@ -97,11 +113,8 @@ def test_load_unknown_data_from_memory(): @raises(BadZipfile) def test_load_csvz_data_from_memory(): - if not PY2: - io = StringIO() - get_data(io, file_type="csvz") - else: - raise BadZipfile("pass it") + io = StringIO() + get_data(io, file_type="csvz") @raises(IOError) @@ -109,19 +122,11 @@ def test_write_xlsx_data(): get_data("test.xlsx") -@raises(exceptions.NoSupportingPluginFound) -def test_write_unknown_data(): - get_data("test.unknown") - - @raises(Exception) def test_writer_csvz_data_from_memory(): - if not PY2: - io = StringIO() - writer = get_writer(io, file_type="csvz") - writer.write({"adb": [[2, 3]]}) - else: - raise Exception("pass it") + io = StringIO() + writer = get_writer(io, file_type="csvz") + writer.write({"adb": [[2, 3]]}) @raises(exceptions.NoSupportingPluginFound) @@ -182,6 +187,7 @@ def test_file_handle_as_input(): with open(test_file, "r") as f: data = get_data(f, "csv") eq_(data["csv"], [[1, 2, 3]]) + os.unlink("file_handle.csv") def test_file_type_case_insensitivity(): @@ -192,6 +198,7 @@ def test_file_type_case_insensitivity(): with open(test_file, "r") as f: data = get_data(f, "csv") eq_(data["csv"], [[1, 2, 3]]) + os.unlink("file_handle.CSv") def test_file_handle_as_output(): @@ -202,6 +209,7 @@ def test_file_handle_as_output(): with open(test_file, "r") as f: content = f.read() eq_(content, "1,2,3\n") + os.unlink("file_handle.csv") def test_binary_file_content(): @@ -247,10 +255,7 @@ def test_conversion_from_bytes_to_text(): def test_is_string(): - if PY2: - assert is_string(type(u"a")) is True - else: - assert is_string(type("a")) is True + assert is_string(type("a")) is True def test_generator_is_obtained(): diff --git a/tests/test_issues.py b/tests/test_issues.py index 81704f8..6486c6a 100644 --- a/tests/test_issues.py +++ b/tests/test_issues.py @@ -1,12 +1,11 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - import os -from nose import SkipTest -from nose.tools import eq_ -from pyexcel_io import get_data, save_data -from pyexcel_io._compact import PY26 + import pyexcel as p +from pyexcel_io import get_data, save_data +from pyexcel_io.exceptions import NoSupportingPluginFound + +from nose import SkipTest +from nose.tools import eq_, raises IN_TRAVIS = "TRAVIS" in os.environ @@ -40,35 +39,19 @@ def test_issue_23(): eq_(data["issue23.csv"], expected) -# def test_issue_28(): -# from pyexcel_io.plugins import readers -# from pyexcel_io.exceptions import UpgradePlugin -# expected = "Please upgrade the plugin '%s' according to " -# expected += "plugin compactibility table." -# try: -# readers.load_me_later('pyexcel_test') -# except UpgradePlugin as e: -# eq_(str(e), expected % 'pyexcel_test') - - def test_issue_33_34(): - if PY26: - pass - else: - import mmap + import mmap - test_file = get_fixture("issue20.csv") - with open(test_file, "r+b") as f: - memory_mapped_file = mmap.mmap( - f.fileno(), 0, access=mmap.ACCESS_READ - ) - data = get_data(memory_mapped_file, file_type="csv") - expected = [[u"to", u"infinity", u"and", u"beyond"]] - eq_(data["csv"], expected) + test_file = get_fixture("issue20.csv") + with open(test_file, "r+b") as f: + memory_mapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + data = get_data(memory_mapped_file, file_type="csv") + expected = [[u"to", u"infinity", u"and", u"beyond"]] + eq_(data["csv"], expected) def test_issue_30_utf8_BOM_header(): - content = [[u"人有悲歡離合", u"月有陰晴圓缺"]] + content = [["人有悲歡離合", "月有陰晴圓缺"]] test_file = "test-utf8-BOM.csv" save_data(test_file, content, encoding="utf-8-sig", lineterminator="\n") custom_encoded_content = get_data(test_file, encoding="utf-8-sig") @@ -80,52 +63,31 @@ def test_issue_30_utf8_BOM_header(): def test_issue_33_34_utf32_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-32") + check_mmap_encoding("utf-32") def test_issue_33_34_utf32be_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-32-be") + check_mmap_encoding("utf-32-be") def test_issue_33_34_utf32le_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-32-le") + check_mmap_encoding("utf-32-le") def test_issue_33_34_utf16_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-16") + check_mmap_encoding("utf-16") def test_issue_33_34_utf16be_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-16-be") + check_mmap_encoding("utf-16-be") def test_issue_33_34_utf16le_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-16-le") + check_mmap_encoding("utf-16-le") def test_issue_33_34_utf8_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-8") + check_mmap_encoding("utf-8") def check_mmap_encoding(encoding): @@ -141,6 +103,7 @@ def check_mmap_encoding(encoding): memory_mapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) data = get_data(memory_mapped_file, file_type="csv", encoding=encoding) eq_(data["csv"], content) + memory_mapped_file.close() os.unlink(test_file) @@ -179,5 +142,10 @@ def test_pyexcel_issue_138(): os.unlink("test.csv") +@raises(NoSupportingPluginFound) +def test_issue_96(): + get_data("foo-bar-data", file_type="Idonotexist") + + def get_fixture(file_name): return os.path.join("tests", "fixtures", file_name) diff --git a/tests/test_new_csv_book.py b/tests/test_new_csv_book.py index e9fa908..7655d40 100644 --- a/tests/test_new_csv_book.py +++ b/tests/test_new_csv_book.py @@ -1,20 +1,22 @@ import os -from unittest import TestCase from textwrap import dedent -from nose.tools import raises +from unittest import TestCase + import pyexcel_io.manager as manager +from pyexcel_io.reader import Reader +from pyexcel_io.writer import Writer from pyexcel_io._compact import OrderedDict -from pyexcel_io.readers.csvr import CSVBookReader -from pyexcel_io.readers.tsv import TSVBookReader -from pyexcel_io.writers.csvw import CSVBookWriter -from pyexcel_io.writers.tsv import TSVBookWriter + +from nose.tools import raises class TestCSVReaders(TestCase): file_type = "csv" - reader_class = CSVBookReader delimiter = "," + def reader_class(self): + return Reader(self.file_type) + def setUp(self): self.test_file = "csv_book." + self.file_type self.data = [["1", "2", "3"], ["4", "5", "6"], ["7", "8", "9"]] @@ -43,17 +45,50 @@ class TestCSVReaders(TestCase): os.unlink(self.test_file) -class TestTSVReaders(TestCSVReaders): +class TestNewCSVReaders(TestCase): + file_type = "csv" + delimiter = "," + + def setUp(self): + self.test_file = "csv_book." + self.file_type + self.data = [["1", "2", "3"], ["4", "5", "6"], ["7", "8", "9"]] + self.expected_data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] + with open(self.test_file, "w") as f: + for row in self.data: + f.write(self.delimiter.join(row) + "\n") + + def test_book_reader(self): + b = Reader(self.file_type) + b.open(self.test_file) + sheets = b.read_all() + self.assertEqual(list(sheets[self.test_file]), self.expected_data) + + def test_book_reader_from_memory_source(self): + io = manager.get_io(self.file_type) + with open(self.test_file, "r") as f: + io.write(f.read()) + io.seek(0) + b = Reader(self.file_type) + b.open_stream(io) + sheets = b.read_all() + self.assertEqual(list(sheets[self.file_type]), self.expected_data) + + def tearDown(self): + os.unlink(self.test_file) + + +class TestTSVReaders(TestNewCSVReaders): file_type = "tsv" - reader_class = TSVBookReader delimiter = "\t" class TestReadMultipleSheets(TestCase): file_type = "csv" - reader_class = CSVBookReader delimiter = "," + def reader_class(self): + return Reader(self.file_type) + def setUp(self): self.test_file_formatter = "csv_multiple__%s__%s." + self.file_type self.merged_book_file = "csv_multiple." + self.file_type @@ -121,14 +156,12 @@ class TestReadMultipleSheets(TestCase): class TestTSVBookReaders(TestReadMultipleSheets): file_type = "tsv" - reader_class = TSVBookReader delimiter = "\t" class TestWriteMultipleSheets(TestCase): file_type = "csv" - writer_class = CSVBookWriter - reader_class = CSVBookReader + result1 = dedent( """ 1,2,3 @@ -170,6 +203,12 @@ class TestWriteMultipleSheets(TestCase): """ ) + def writer_class(self): + return Writer(self.file_type) + + def reader_class(self): + return Reader(self.file_type) + def setUp(self): self.test_file_formatter = "csv_multiple__%s__%s." + self.file_type self.merged_book_file = "csv_multiple." + self.file_type @@ -209,7 +248,7 @@ class TestWriteMultipleSheets(TestCase): def test_multiple_sheet_into_memory(self): io = manager.get_io(self.file_type) w = self.writer_class() - w.open(io, lineterminator="\n") + w.open_stream(io, lineterminator="\n") w.write(self.sheets) w.close() content = io.getvalue() @@ -219,7 +258,7 @@ class TestWriteMultipleSheets(TestCase): """Write csv book into a single stream""" io = manager.get_io(self.file_type) w = self.writer_class() - w.open(io, lineterminator="\n") + w.open_stream(io, lineterminator="\n") w.write(self.sheets) w.close() reader = self.reader_class() @@ -239,8 +278,7 @@ class TestWriteMultipleSheets(TestCase): class TestTSVWriteMultipleSheets(TestWriteMultipleSheets): file_type = "tsv" - writer_class = TSVBookWriter - reader_class = TSVBookReader + result1 = dedent( """ 1\t2\t3 @@ -285,7 +323,7 @@ class TestTSVWriteMultipleSheets(TestWriteMultipleSheets): class TestWriter(TestCase): file_type = "csv" - writer_class = CSVBookWriter + result = dedent( """ 1,2,3 @@ -294,6 +332,9 @@ class TestWriter(TestCase): """ ).strip("\n") + def writer_class(self): + return Writer(self.file_type) + def setUp(self): self.test_file = "csv_book." + self.file_type self.data = [["1", "2", "3"], ["4", "5", "6"], ["7", "8", "9"]] @@ -313,7 +354,7 @@ class TestWriter(TestCase): class TestTSVWriters(TestWriter): file_type = "tsv" - writer_class = TSVBookWriter + result = dedent( """ 1\t2\t3 @@ -325,7 +366,7 @@ class TestTSVWriters(TestWriter): class TestMemoryWriter(TestCase): file_type = "csv" - writer_class = CSVBookWriter + result = dedent( """ 1,2,3 @@ -334,6 +375,9 @@ class TestMemoryWriter(TestCase): """ ).strip("\n") + def writer_class(self): + return Writer(self.file_type) + def setUp(self): self.test_file = "csv_book." + self.file_type self.data = [["1", "2", "3"], ["4", "5", "6"], ["7", "8", "9"]] @@ -341,7 +385,7 @@ class TestMemoryWriter(TestCase): def test_book_writer_to_memroy(self): io = manager.get_io(self.file_type) w = self.writer_class() - w.open(io, single_sheet_in_book=True) + w.open_stream(io, single_sheet_in_book=True) w.write({self.file_type: self.data}) w.close() content = io.getvalue().replace("\r", "") @@ -350,7 +394,7 @@ class TestMemoryWriter(TestCase): class TestTSVMemoryWriter(TestMemoryWriter): file_type = "tsv" - writer_class = TSVBookWriter + result = dedent( """ 1\t2\t3 diff --git a/tests/test_new_csvz_book.py b/tests/test_new_csvz_book.py index fb7892a..2d4c4ef 100644 --- a/tests/test_new_csvz_book.py +++ b/tests/test_new_csvz_book.py @@ -1,26 +1,27 @@ # -*- coding: utf-8 -*- import os -from unittest import TestCase -from pyexcel_io._compact import OrderedDict -from pyexcel_io import save_data -import pyexcel_io.manager as manager -from pyexcel_io.readers.csvz import CSVZipBookReader -from pyexcel_io.writers.csvz import CSVZipBookWriter -from pyexcel_io.readers.tsvz import TSVZipBookReader -from pyexcel_io.writers.tsvz import TSVZipBookWriter import zipfile -from nose.tools import raises -import sys +from unittest import TestCase -PY2 = sys.version_info[0] == 2 +import pyexcel_io.manager as manager +from pyexcel_io import save_data +from pyexcel_io.reader import Reader +from pyexcel_io.writer import Writer +from pyexcel_io._compact import OrderedDict + +from nose.tools import raises class TestCSVZ(TestCase): file_type = "csvz" - writer_class = CSVZipBookWriter - reader_class = CSVZipBookReader result = u"中,文,1,2,3" + def writer_class(self): + return Writer(self.file_type) + + def reader_class(self): + return Reader(self.file_type) + def setUp(self): self.file = "csvz." + self.file_type @@ -50,25 +51,33 @@ class TestCSVZ(TestCase): self.assertEqual(list(data["pyexcel_sheet1"]), [[u"中", u"文", 1, 2, 3]]) zipreader.close() + def test_reading_utf32(self): + zip = zipfile.ZipFile(self.file, "w") + zip.writestr("something.ext", self.result.encode("utf-32")) + zip.close() + zipreader = self.reader_class() + zipreader.open(self.file) + data = zipreader.read_all() + self.assertEqual(list(data["something"]), [[u"中", u"文", 1, 2, 3]]) + zipreader.close() + def tearDown(self): os.unlink(self.file) class TestTSVZ(TestCSVZ): file_type = "tsvz" - writer_class = TSVZipBookWriter - reader_class = TSVZipBookReader result = u"中\t文\t1\t2\t3" def test_reading_from_memory(): data = [[1, 2, 3]] io = manager.get_io("csvz") - zipbook = CSVZipBookWriter() + zipbook = Writer("csvz") zipbook.open_stream(io) zipbook.write({None: data}) zipbook.close() - zipreader = CSVZipBookReader() + zipreader = Reader("csvz") zipreader.open_stream(io) data = zipreader.read_all() assert list(data["pyexcel_sheet1"]) == [[1, 2, 3]] @@ -77,11 +86,11 @@ def test_reading_from_memory(): def test_reading_from_memory_tsvz(): data = [[1, 2, 3]] io = manager.get_io("tsvz") - zipbook = TSVZipBookWriter() + zipbook = Writer("tsvz") zipbook.open_stream(io) zipbook.write({None: data}) zipbook.close() - zipreader = TSVZipBookReader() + zipreader = Reader("tsvz") zipreader.open_stream(io) data = zipreader.read_all() assert list(data["pyexcel_sheet1"]) == [[1, 2, 3]] @@ -89,7 +98,9 @@ def test_reading_from_memory_tsvz(): class TestMultipleSheet(TestCase): file_name = "mybook.csvz" - reader_class = CSVZipBookReader + + def reader_class(self): + return Reader("csvz") def setUp(self): self.content = OrderedDict() @@ -138,4 +149,6 @@ class TestMultipleSheet(TestCase): class TestMultipleTSVSheet(TestMultipleSheet): file_name = "mybook.tsvz" - reader_class = TSVZipBookReader + + def reader_class(self): + return Reader("tsvz") diff --git a/tests/test_plugin_api.py b/tests/test_plugin_api.py new file mode 100644 index 0000000..1d591ad --- /dev/null +++ b/tests/test_plugin_api.py @@ -0,0 +1,61 @@ +from pyexcel_io.plugin_api import ISheet, IReader, IWriter, ISheetWriter + +from nose.tools import raises + + +class TestISheet: + def setUp(self): + self.isheet = ISheet() + + @raises(NotImplementedError) + def test_row_iterator(self): + self.isheet.row_iterator() + + @raises(NotImplementedError) + def test_column_iterator(self): + self.isheet.column_iterator(1) + + +class TestISheetWriter: + def setUp(self): + self.isheet_writer = ISheetWriter() + + @raises(NotImplementedError) + def test_write_row(self): + self.isheet_writer.write_row([1, 2]) + + @raises(NotImplementedError) + def test_close(self): + self.isheet_writer.close() + + +class TestIReader: + def setUp(self): + self.ireader = IReader() + + @raises(NotImplementedError) + def test_read_sheet(self): + self.ireader.read_sheet(1) + + @raises(NotImplementedError) + def test_close(self): + self.ireader.close() + + +class TestIWriter: + def setUp(self): + self.iwriter = IWriter() + + @raises(NotImplementedError) + def test_create_sheet(self): + self.iwriter.create_sheet("a name") + + +@raises(Exception) +def test_empty_writer(): + class TestWriter(IWriter): + def create_sheet(self, sheet_name): + return None + + test_writer = TestWriter() + test_writer.write({"sheet 1": [[1, 2]]}) diff --git a/tests/test_pyexcel_integration.py b/tests/test_pyexcel_integration.py index a13a80f..963da21 100644 --- a/tests/test_pyexcel_integration.py +++ b/tests/test_pyexcel_integration.py @@ -1,7 +1,7 @@ import os import datetime -from unittest import TestCase from textwrap import dedent +from unittest import TestCase import pyexcel as pe from pyexcel_io._compact import text_type diff --git a/tests/test_renderer.py b/tests/test_renderer.py index 38caf8a..209bad9 100644 --- a/tests/test_renderer.py +++ b/tests/test_renderer.py @@ -1,7 +1,9 @@ import os -from nose.tools import eq_ + from pyexcel_io import get_data, save_data +from nose.tools import eq_ + class TestRenderer: def setUp(self): diff --git a/tests/test_service.py b/tests/test_service.py index 10b486e..70f6209 100644 --- a/tests/test_service.py +++ b/tests/test_service.py @@ -1,13 +1,22 @@ -from nose.tools import eq_, raises -from pyexcel_io.service import date_value, time_value -from pyexcel_io.service import detect_int_value -from pyexcel_io.service import detect_float_value -from pyexcel_io.service import ODS_WRITE_FORMAT_COVERSION -from pyexcel_io.service import ods_float_value -from pyexcel_io.service import throw_exception -from pyexcel_io._compact import PY2 +from datetime import time, datetime, timedelta + +from pyexcel_io.service import ( + date_value, + time_value, + float_value, + boolean_value, + ods_bool_value, + ods_date_value, + ods_time_value, + ods_float_value, + throw_exception, + detect_int_value, + detect_float_value, + ods_timedelta_value, +) from pyexcel_io.exceptions import IntegerAccuracyLossError -from nose import SkipTest + +from nose.tools import eq_, raises def test_date_util_parse(): @@ -97,14 +106,6 @@ def test_detect_float_value_on_custom_nan_text2(): eq_(str(result), "nan") -def test_ods_write_format_conversion(): - if PY2: - expected = ODS_WRITE_FORMAT_COVERSION[long] # noqa: F821 - eq_('throw_exception', expected) - else: - raise SkipTest() - - @raises(IntegerAccuracyLossError) def test_big_int_value(): ods_float_value(1000000000000000) @@ -113,3 +114,57 @@ def test_big_int_value(): @raises(IntegerAccuracyLossError) def test_throw_exception(): throw_exception(1000000000000000) + + +def test_boolean_value(): + fixture = ["true", "false", 1] + expected = [True, False, 1] + + actual = [boolean_value(element) for element in fixture] + eq_(actual, expected) + + +def test_time_delta_presentation(): + a = datetime(2020, 12, 12, 12, 12, 12) + b = datetime(2020, 11, 12, 12, 12, 11) + delta = a - b + + value = ods_timedelta_value(delta) + eq_(value, "PT720H00M01S") + + +def test_ods_bool_to_string(): + fixture = [True, False] + expected = ["true", "false"] + + actual = [ods_bool_value(element) for element in fixture] + eq_(actual, expected) + + +def test_ods_time_value(): + test = datetime(2020, 10, 6, 11, 11, 11) + actual = ods_time_value(test) + eq_(actual, "PT11H11M11S") + + +def test_ods_date_value(): + test = datetime(2020, 10, 6, 11, 11, 11) + actual = ods_date_value(test) + eq_(actual, "2020-10-06") + + +def test_time_value_returns_time_delta(): + test_time_value = "PT720H00M01S" + delta = time_value(test_time_value) + eq_(delta, timedelta(days=30, seconds=1)) + + +def test_time_value(): + test_time_value = "PT23H00M01S" + delta = time_value(test_time_value) + eq_(delta, time(23, 0, 1)) + + +def test_float_value(): + a = float_value("1.2") + eq_(a, 1.2) diff --git a/tests/test_sheet.py b/tests/test_sheet.py index 3e3b7ac..d82cb30 100644 --- a/tests/test_sheet.py +++ b/tests/test_sheet.py @@ -1,6 +1,7 @@ -from nose.tools import eq_ -from pyexcel_io.sheet import SheetWriter, SheetReader import pyexcel_io.constants as constants +from pyexcel_io.sheet import SheetReader, SheetWriter + +from nose.tools import eq_ class MyWriter(SheetWriter): diff --git a/tests/test_sql_book.py b/tests/test_sql_book.py index 6691ae6..514d813 100644 --- a/tests/test_sql_book.py +++ b/tests/test_sql_book.py @@ -1,32 +1,41 @@ import sys import json -from sqlalchemy import create_engine -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy import Column, Integer, String -from sqlalchemy import Float, Date, DateTime, ForeignKey -from sqlalchemy.orm import sessionmaker import datetime +import platform + +from sqlalchemy import ( + Date, + Float, + Column, + String, + Integer, + DateTime, + ForeignKey, + create_engine, +) +from sqlalchemy.orm import backref, relationship, sessionmaker +from pyexcel_io.reader import EncapsulatedSheetReader from pyexcel_io._compact import OrderedDict from pyexcel_io.database.common import ( SQLTableExporter, - SQLTableExportAdapter, SQLTableImporter, + SQLTableExportAdapter, SQLTableImportAdapter, ) +from sqlalchemy.ext.declarative import declarative_base +from pyexcel_io.database.querysets import QuerysetsReader +from pyexcel_io.database.exporters.queryset import QueryReader from pyexcel_io.database.exporters.sqlalchemy import ( - SQLTableReader, SQLBookReader, + SQLTableReader, ) from pyexcel_io.database.importers.sqlalchemy import ( - PyexcelSQLSkipRowException, - SQLTableWriter, SQLBookWriter, + SQLTableWriter, + PyexcelSQLSkipRowException, ) -from pyexcel_io.database.querysets import QuerysetsReader -from sqlalchemy.orm import relationship, backref -from nose.tools import raises, eq_ -import platform +from nose.tools import eq_, raises PY3 = sys.version_info[0] == 3 PY36 = PY3 and sys.version_info[1] == 6 @@ -117,7 +126,7 @@ class TestSingleRead: ["2014-11-12", 1, "Smith", 12.25], ] # 'pyexcel' here is the table name - assert list(data) == content + eq_(list(data), content) mysession.close() def test_sql_formating(self): @@ -127,8 +136,8 @@ class TestSingleRead: return [str(element) for element in row] # the key for this test case - sheet = SQLTableReader( - mysession, Pyexcel, row_renderer=custom_renderer + sheet = EncapsulatedSheetReader( + SQLTableReader(mysession, Pyexcel), row_renderer=custom_renderer ) data = sheet.to_array() content = [ @@ -141,7 +150,9 @@ class TestSingleRead: def test_sql_filter(self): mysession = Session() - sheet = SQLTableReader(mysession, Pyexcel, start_row=1) + sheet = EncapsulatedSheetReader( + SQLTableReader(mysession, Pyexcel), start_row=1 + ) data = sheet.to_array() content = [ ["2014-11-11", 0, "Adam", 11.25], @@ -153,7 +164,9 @@ class TestSingleRead: def test_sql_filter_1(self): mysession = Session() - sheet = SQLTableReader(mysession, Pyexcel, start_row=1, row_limit=1) + sheet = EncapsulatedSheetReader( + SQLTableReader(mysession, Pyexcel), start_row=1, row_limit=1 + ) data = sheet.to_array() content = [["2014-11-11", 0, "Adam", 11.25]] # 'pyexcel'' here is the table name @@ -162,7 +175,9 @@ class TestSingleRead: def test_sql_filter_2(self): mysession = Session() - sheet = SQLTableReader(mysession, Pyexcel, start_column=1) + sheet = EncapsulatedSheetReader( + SQLTableReader(mysession, Pyexcel), start_column=1 + ) data = sheet.to_array() content = [ ["id", "name", "weight"], @@ -175,8 +190,8 @@ class TestSingleRead: def test_sql_filter_3(self): mysession = Session() - sheet = SQLTableReader( - mysession, Pyexcel, start_column=1, column_limit=1 + sheet = EncapsulatedSheetReader( + SQLTableReader(mysession, Pyexcel), start_column=1, column_limit=1 ) data = sheet.to_array() content = [["id"], [0], [1]] @@ -212,6 +227,14 @@ class TestSingleWrite: reader = QuerysetsReader(query_sets, self.data[0]) results = reader.to_array() assert list(results) == self.results + + query_sets = mysession.query(Pyexcel).all() + query_reader = QueryReader(query_sets, None, column_names=self.data[0]) + result = read_all(query_reader) + for key in result: + result[key] = list(result[key]) + eq_(result, {"pyexcel_sheet1": self.results}) + query_reader.close() mysession.close() def test_update_existing_row(self): @@ -434,8 +457,7 @@ class TestMultipleRead: post_adapter.column_names = data["Post"][0] post_adapter.row_initializer = post_init_func importer.append(post_adapter) - writer = SQLBookWriter() - writer.open_content(importer) + writer = SQLBookWriter(importer, "sql") to_store = OrderedDict() to_store.update({category_adapter.get_name(): data["Category"][1:]}) to_store.update({post_adapter.get_name(): data["Post"][1:]}) @@ -448,12 +470,12 @@ class TestMultipleRead: exporter.append(category_adapter) post_adapter = SQLTableExportAdapter(Post) exporter.append(post_adapter) - book = SQLBookReader() - book.open_content(exporter) - data = book.read_all() - for key in data.keys(): - data[key] = list(data[key]) - assert json.dumps(data) == ( + reader = SQLBookReader(exporter, "sql") + result = read_all(reader) + for key in result: + result[key] = list(result[key]) + + assert json.dumps(result) == ( '{"category": [["id", "name"], [1, "News"], [2, "Sports"]], ' + '"post": [["body", "category_id", "id", "pub_date", "title"], ' + '["formal", 1, 1, "2015-01-20T23:28:29", "Title A"], ' @@ -543,13 +565,19 @@ def test_sql_table_import_adapter(): @raises(Exception) -def test_unknown_sheet(self): +def test_unknown_sheet(): importer = SQLTableImporter(None) category_adapter = SQLTableImportAdapter(Category) category_adapter.column_names = [""] importer.append(category_adapter) - writer = SQLBookWriter() - writer.open_content(importer) + writer = SQLBookWriter(importer, "sql") to_store = OrderedDict() to_store.update({"you do not see me": [[]]}) writer.write(to_store) + + +def read_all(reader): + result = OrderedDict() + for index, sheet in enumerate(reader.content_array): + result.update({sheet.name: reader.read_sheet(index).to_array()}) + return result