Merge branch 'wip/github'

This commit is contained in:
Frédéric Péters 2022-03-29 18:47:32 +02:00
commit 81bbd9e685
109 changed files with 3553 additions and 1982 deletions

4
.github/FUNDING.yml vendored Normal file
View File

@ -0,0 +1,4 @@
# These are supported funding model platforms
github: chfw
patreon: chfw

8
.github/PULL_REQUEST_TEMPLATE.md vendored Normal file
View File

@ -0,0 +1,8 @@
With your PR, here is a check list:
- [ ] Has test cases written?
- [ ] Has all code lines tested?
- [ ] Has `make format` been run?
- [ ] Please update CHANGELOG.yml(not CHANGELOG.rst)
- [ ] Has fair amount of documentation if your change is complex
- [ ] Agree on NEW BSD License for your contribution

20
.github/workflows/lint.yml vendored Normal file
View File

@ -0,0 +1,20 @@
name: lint
on: [push, pull_request]
jobs:
lint:
runs-on: ubuntu-latest
name: lint code
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: 3.8
- name: lint
run: |
pip --use-deprecated=legacy-resolver install flake8
pip --use-deprecated=legacy-resolver install -r tests/requirements.txt
flake8 --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long .
python setup.py checkdocs

28
.github/workflows/moban-update.yml vendored Normal file
View File

@ -0,0 +1,28 @@
on: [push]
jobs:
run_moban:
runs-on: ubuntu-latest
name: synchronize templates via moban
steps:
- uses: actions/checkout@v2
with:
ref: ${{ github.head_ref }}
token: ${{ secrets.PAT }}
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: '3.7'
- name: check changes
run: |
pip install moban gitfs2 pypifs moban-jinja2-github moban-ansible
moban
git status
git diff --exit-code
- name: Auto-commit
if: failure()
uses: stefanzweifel/git-auto-commit-action@v4
with:
commit_message: >-
This is an auto-commit, updating project meta data,
such as changelog.rst, contributors.rst

30
.github/workflows/pythonpackage.yml vendored Normal file
View File

@ -0,0 +1,30 @@
name: Unit tests on ubuntu
on: [push]
jobs:
build:
runs-on: ubuntu-latest
strategy:
max-parallel: 4
matrix:
python-version: [3.6, 3.7, 3.8]
steps:
- uses: actions/checkout@v1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Lint with flake8
run: |
make install_test format git-diff-check lint
- name: Test
run: |
pip install -r tests/requirements.txt
make

26
.github/workflows/pythonpublish.yml vendored Normal file
View File

@ -0,0 +1,26 @@
name: Upload Python Package
on:
release:
types: [created]
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python setup.py sdist bdist_wheel
twine upload dist/*

35
.github/workflows/tests.yml vendored Normal file
View File

@ -0,0 +1,35 @@
name: Run unit tests on Windows and Mac
on: [push, pull_request]
jobs:
test:
strategy:
fail-fast: false
matrix:
python-version: [3.6, 3.7, 3.8, 3.9]
os: [macOs-latest, ubuntu-latest, windows-latest]
exclude:
- os: macOs-latest
python-version: 3.6
runs-on: ${{ matrix.os }}
name: run tests
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: install
run: |
pip --use-deprecated=legacy-resolver install -r requirements.txt
pip --use-deprecated=legacy-resolver install -r tests/requirements.txt
- name: test
run: |
pip freeze
nosetests --verbosity=3 --with-coverage --cover-package pyexcel_io --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io
- name: Upload coverage
uses: codecov/codecov-action@v1
with:
name: ${{ matrix.os }} Python ${{ matrix.python-version }}

552
.gitignore vendored
View File

@ -1,8 +1,546 @@
*.pyc
*~
# moban hashes
.moban.hashes
# Extra rules from https://github.com/github/gitignore/
# Python rules
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
pyexcel*-info
build
dist
tmp.db
.idea/*
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# VirtualEnv rules
# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
.Python
[Bb]in
[Ii]nclude
[Ll]ib
[Ll]ib64
[Ll]ocal
[Ss]cripts
pyvenv.cfg
.venv
pip-selfcheck.json
# Linux rules
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
# Windows rules
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
# macOS rules
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# Emacs rules
# -*- mode: gitignore; -*-
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
auto-save-list
tramp
.\#*
# Org-mode
.org-id-locations
*_archive
# flymake-mode
*_flymake.*
# eshell files
/eshell/history
/eshell/lastdir
# elpa packages
/elpa/
# reftex files
*.rel
# AUCTeX auto folder
/auto/
# cask packages
.cask/
dist/
# Flycheck
flycheck_*.el
# server auth directory
/server/
# projectiles files
.projectile
# directory configuration
.dir-locals.el
# network security
/network-security.data
# Vim rules
# Swap
[._]*.s[a-v][a-z]
!*.svg # comment out if you don't need vector files
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
[._]sw[a-p]
# Session
Session.vim
Sessionx.vim
# Temporary
.netrwhist
*~
# Auto-generated tag files
tags
# Persistent undo
[._]*.un~
# JetBrains rules
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
# SublimeText rules
# Cache files for Sublime Text
*.tmlanguage.cache
*.tmPreferences.cache
*.stTheme.cache
# Workspace files are user-specific
*.sublime-workspace
# Project files should be checked into the repository, unless a significant
# proportion of contributors will probably not be using Sublime Text
# *.sublime-project
# SFTP configuration file
sftp-config.json
sftp-config-alt*.json
# Package control specific files
Package Control.last-run
Package Control.ca-list
Package Control.ca-bundle
Package Control.system-ca-bundle
Package Control.cache/
Package Control.ca-certs/
Package Control.merged-ca-bundle
Package Control.user-ca-bundle
oscrypto-ca-bundle.crt
bh_unicode_properties.cache
# Sublime-github package stores a github token in this file
# https://packagecontrol.io/packages/sublime-github
GitHub.sublime-settings
# KDevelop4 rules
*.kdev4
.kdev4/
# Kate rules
# Swap Files #
.*.kate-swp
.swp.*
# TextMate rules
*.tmproj
*.tmproject
tmtags
# VisualStudioCode rules
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# Local History for Visual Studio Code
.history/
# Xcode rules
# Xcode
#
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
## User settings
xcuserdata/
## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9)
*.xcscmblueprint
*.xccheckout
## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4)
build/
DerivedData/
*.moved-aside
*.pbxuser
!default.pbxuser
*.mode1v3
!default.mode1v3
*.mode2v3
!default.mode2v3
*.perspectivev3
!default.perspectivev3
## Gcc Patch
/*.gcno
# Eclipse rules
.metadata
bin/
tmp/
*.tmp
*.bak
*.swp
*~.nib
local.properties
.settings/
.loadpath
.recommenders
# External tool builders
.externalToolBuilders/
# Locally stored "Eclipse launch configurations"
*.launch
# PyDev specific (Python IDE for Eclipse)
*.pydevproject
# CDT-specific (C/C++ Development Tooling)
.cproject
# CDT- autotools
.autotools
# Java annotation processor (APT)
.factorypath
# PDT-specific (PHP Development Tools)
.buildpath
# sbteclipse plugin
.target
# Tern plugin
.tern-project
# TeXlipse plugin
.texlipse
# STS (Spring Tool Suite)
.springBeans
# Code Recommenders
.recommenders/
# Annotation Processing
.apt_generated/
.apt_generated_test/
# Scala IDE specific (Scala & Java development for Eclipse)
.cache-main
.scala_dependencies
.worksheet
# Uncomment this line if you wish to ignore the project description file.
# Typically, this file would be tracked if it contains build/dependency configurations:
#.project
# TortoiseGit rules
# Project-level settings
/.tgitconfig
# Tags rules
# Ignore tags created by etags, ctags, gtags (GNU global) and cscope
TAGS
.TAGS
!TAGS/
tags
.tags
!tags/
gtags.files
GTAGS
GRTAGS
GPATH
GSYMS
cscope.files
cscope.out
cscope.in.out
cscope.po.out
# remove moban hash dictionary
.moban.hashes

10
.isort.cfg Normal file
View File

@ -0,0 +1,10 @@
[settings]
line_length=79
known_first_party=lml, pyexcel
known_third_party=nose
indent=' '
multi_line_output=3
length_sort=1
default_section=FIRSTPARTY
no_lines_before=LOCALFOLDER
sections=FUTURE,STDLIB,FIRSTPARTY,THIRDPARTY,LOCALFOLDER

View File

@ -0,0 +1,13 @@
{% extends "travis.yml.jj2" %}
{%block extra_matrix %}
env:
- MINREQ=1
{%endblock%}
{%block custom_python_versions%}
python:
- 3.8
- 3.7
- 3.6
{%endblock%}
{%block pypi_deployment%}
{%endblock %}

View File

@ -12,4 +12,3 @@ def setup(app):
{%endblock%}

View File

@ -57,7 +57,9 @@ get_data(.., library='pyexcel-ods')
============= ======= ======== ======= ======== ======== ========
`pyexcel-io`_ `xls`_ `xlsx`_ `ods`_ `ods3`_ `odsr`_ `xlsxw`_
============= ======= ======== ======= ======== ======== ========
0.5.1 0.5.0 0.5.0 0.5.0 0.5.0 0.5.0 0.5.0
0.6.0+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+
0.5.10+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+
0.5.1+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+
0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x
0.3.0+ 0.3.0+ 0.3.0 0.3.0+ 0.3.0+ 0.3.0 0.3.0
0.2.2+ 0.2.2+ 0.2.2+ 0.2.1+ 0.2.1+ 0.0.1
@ -106,6 +108,7 @@ API
.. autosummary::
:toctree: api/
iget_data
get_data
save_data

View File

@ -6,4 +6,3 @@
{%block pyexcel_extra_classifiers%}
'Programming Language :: Python :: Implementation :: PyPy'
{%endblock%}}

View File

@ -1,6 +0,0 @@
{% extends 'tests/requirements.txt.jj2' %}
{%block extras %}
SQLAlchemy
pyexcel>=0.2.0
pyexcel-xls>=0.1.0
{%endblock%}

View File

@ -1,26 +1,9 @@
requires:
- type: git
url: https://github.com/moremoban/pypi-mobans
submodule: true
- https://github.com/pyexcel/pyexcel-mobans
overrides: "git://github.com/pyexcel/pyexcel-mobans!/mobanfile.yaml"
configuration:
configuration_dir: "pyexcel-mobans:config"
template_dir:
- "pyexcel-mobans:templates"
- "pypi-mobans:templates"
- ".moban.d"
configuration: pyexcel-io.yml
targets:
- "docs/source/conf.py": "docs/source/conf.py"
- setup.py: setup.py
- .travis.yml: travis.yml.jj2
- requirements.txt: requirements.txt.jj2
- "tests/requirements.txt": "tests/requirements.txt"
- LICENSE: NEW_BSD_LICENSE.jj2
- test.sh: test.script.jj2
- test.bat: test.script.jj2
- README.rst: README.rst
- "docs/source/index.rst": "docs/source/index.rst"
- output: CHANGELOG.rst
configuration: changelog.yml
template: CHANGELOG.rst.jj2
- "docs/source/conf.py": "docs/source/custom_conf.py.jj2"
- setup.py: io_setup.py.jj2
- README.rst: io_readme.rst.jj2
- "docs/source/index.rst": "docs/source/index.rst.jj2"
- .gitignore: gitignore.jj2

18
.readthedocs.yml Normal file
View File

@ -0,0 +1,18 @@
# .readthedocs.yml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/source/conf.py
# Optionally build your docs in additional formats such as PDF
formats:
- pdf
# Optionally set the version of Python and requirements required to build your docs
python:
version: 3.7

View File

@ -1,23 +0,0 @@
sudo: false
language: python
notifications:
email: false
python:
- pypy-5.3.1
- 3.7-dev
- 3.6
- 3.5
- 3.4
- 2.7
before_install:
- if [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then pip install flake8==2.6.2; fi
- if [[ -f min_requirements.txt && "$MINREQ" -eq 1 ]]; then
mv min_requirements.txt requirements.txt ;
fi
- test ! -f rnd_requirements.txt || pip install --no-deps -r rnd_requirements.txt
- test ! -f rnd_requirements.txt || pip install -r rnd_requirements.txt ;
- pip install -r tests/requirements.txt
script:
- make test
after_success:
codecov

View File

@ -1,11 +1,134 @@
Change log
================================================================================
0.6.6 - 31.1.2022
--------------------------------------------------------------------------------
**updated**
#. `#112 <https://github.com/pyexcel/pyexcel-io/issues/112>`_: Log Empty Row
Warning instead 'print'
0.6.5 - 08.10.2021
--------------------------------------------------------------------------------
**updated**
#. `#109 <https://github.com/pyexcel/pyexcel-io/issues/109>`_: enable ods3 to
have datetime
0.6.4 - 31.10.2020
--------------------------------------------------------------------------------
**updated**
#. `#102 <https://github.com/pyexcel/pyexcel-io/issues/102>`_: skip columns from
imported excel sheet.
0.6.3 - 12.10.2020
--------------------------------------------------------------------------------
**fixed**
#. `#96 <https://github.com/pyexcel/pyexcel-io/issues/96>`_: regression: unknown
file type shall trigger NoSupportingPluginFound
**updated**
#. extra dependencies uses 0.6.0 based plugins
0.6.2 - 7.10.2020
--------------------------------------------------------------------------------
**updated**
#. `#94 <https://github.com/pyexcel/pyexcel-io/issues/94>`_: keep backward
compatibility for pyexcel-xls 0.4.1
0.6.1 - 7.10.2020
--------------------------------------------------------------------------------
**removed**
#. python 3.6 lower versions are no longer supported
**updated**
#. pyexcel-io plugin interface has been rewritten. PyInstaller user will be
impacted. please read 'Packaging with Pyinstaller' in the documentation.
#. new query set reader plugin. pyexcel<=0.6.4 has used intrusive way of getting
query set source done. it is against the plugin interface.
**fixed**
#. `#74 <https://github.com/pyexcel/pyexcel-io/issues/74>`_: handle zip files
which contain non-UTF-8 encoded files.
**added**
#. `#86 <https://github.com/pyexcel/pyexcel-io/issues/86>`_: allow trailing
options, get_data(...keep_trailing_empty_cells=True).
0.5.20 - 17.7.2019
--------------------------------------------------------------------------------
**updated**
#. `#70 <https://github.com/pyexcel/pyexcel-io/issues/70>`_: when the given file
is a root directory, the error shall read it is not a file
0.5.19 - 14.7.2019
--------------------------------------------------------------------------------
**updated**
#. `pyexcel#185 <https://github.com/pyexcel/pyexcel/issues/185>`_: handle stream
conversion if file type(html) needs string content then bytes to handle
0.5.18 - 12.06.2019
--------------------------------------------------------------------------------
**updated**
#. `#69 <https://github.com/pyexcel/pyexcel-io/issues/69>`_: Force file
type(force_file_type) on write
0.5.17 - 04.04.2019
--------------------------------------------------------------------------------
**updated**
#. `#68 <https://github.com/pyexcel/pyexcel-io/issues/68>`_: Raise IOError when
the data file does not exist
0.5.16 - 19.03.2019
--------------------------------------------------------------------------------
**updated**
#. `#67 <https://github.com/pyexcel/pyexcel-io/issues/67>`_: fix conversion
issue for long type on python 2.7 for ods
0.5.15 - 16.03.2019
--------------------------------------------------------------------------------
**updated**
#. `pyexcel-ods#33 <https://github.com/pyexcel/pyexcel-ods/issues/33>`_: fix
integer comparision error on i586
0.5.14 - 21.02.2019
--------------------------------------------------------------------------------
**updated**
#. `#65 <https://github.com/pyexcel/pyexcel-io/issues/65>`_: add
tests/__init__.py because python2.7 setup.py test needs it
0.5.13 - 12.02.2019
--------------------------------------------------------------------------------
updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**updated**
#. `#63 <https://github.com/pyexcel/pyexcel-io/issues/63>`_: Version 0.5.12
prevents xslx and ods plugin from being loaded
@ -13,19 +136,17 @@ updated
0.5.12 - 9.02.2019
--------------------------------------------------------------------------------
updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**updated**
#. `#60 <https://github.com/pyexcel/pyexcel-io/issues/60>`_: include tests in
tar ball
#. `#61 <https://github.com/pyexcel/pyexcel-io/issues/61>`_: enable python
setup.py test
0.5.10 - 3.12.2018
0.5.11 - 3.12.2018
--------------------------------------------------------------------------------
updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**updated**
#. `#59 <https://github.com/pyexcel/pyexcel-io/issues/59>`_: Please use
scan_plugins_regex, which lml 0.7 complains about
@ -33,8 +154,7 @@ updated
0.5.10 - 27.11.2018
--------------------------------------------------------------------------------
added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**added**
#. `#57 <https://github.com/pyexcel/pyexcel-io/issues/57>`_, long type will not
be written in ods. please use string type. And if the integer is equal or
@ -45,8 +165,7 @@ added
0.5.9.1 - 30.08.2018
--------------------------------------------------------------------------------
updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**updated**
#. `#53 <https://github.com/pyexcel/pyexcel-io/issues/53>`_, upgrade lml
dependency to at least 0.0.2
@ -54,8 +173,7 @@ updated
0.5.9 - 23.08.2018
--------------------------------------------------------------------------------
added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**added**
#. `pyexcel#148 <https://github.com/pyexcel/pyexcel/issues/148>`_, support
force_file_type
@ -63,8 +181,7 @@ added
0.5.8 - 16.08.2018
--------------------------------------------------------------------------------
added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**added**
#. `#49 <https://github.com/pyexcel/pyexcel-io/issues/49>`_, support additional
options when detecting float values in csv format. default_float_nan,
@ -73,8 +190,7 @@ added
0.5.7 - 02.05.2018
--------------------------------------------------------------------------------
fixed
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**fixed**
#. `#48 <https://github.com/pyexcel/pyexcel-io/issues/48>`_, turn off pep 0515
#. `#47 <https://github.com/pyexcel/pyexcel-io/issues/47>`_, csv reader cannot
@ -83,8 +199,7 @@ fixed
0.5.6 - 11.01.2018
--------------------------------------------------------------------------------
fixed
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**fixed**
#. `#46 <https://github.com/pyexcel/pyexcel-io/issues/46>`_, expose `bulk_save`
to developer
@ -92,8 +207,7 @@ fixed
0.5.5 - 23.12.2017
--------------------------------------------------------------------------------
fixed
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**fixed**
#. Issue `#45 <https://github.com/pyexcel/pyexcel-io/issues/45>`_, csv reader
throws exception because google app engine does not support mmap. People who
@ -103,8 +217,7 @@ fixed
0.5.4 - 10.11.2017
--------------------------------------------------------------------------------
updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**updated**
#. PR `#44 <https://github.com/pyexcel/pyexcel-io/pull/44>`_, use unicodewriter
for csvz writers.
@ -112,8 +225,7 @@ updated
0.5.3 - 23.10.2017
--------------------------------------------------------------------------------
updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**updated**
#. pyexcel `pyexcel#105 <https://github.com/pyexcel/pyexcel/issues/105>`_,
remove gease from setup_requires, introduced by 0.5.2.
@ -122,8 +234,7 @@ updated
0.5.2 - 20.10.2017
--------------------------------------------------------------------------------
added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**added**
#. `pyexcel#103 <https://github.com/pyexcel/pyexcel/issues/103>`_, include
LICENSE file in MANIFEST.in, meaning LICENSE file will appear in the released
@ -132,8 +243,7 @@ added
0.5.1 - 02.09.2017
--------------------------------------------------------------------------------
Fixed
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Fixed**
#. `pyexcel-ods#25 <https://github.com/pyexcel/pyexcel-ods/issues/25>`_,
Unwanted dependency on pyexcel.
@ -141,13 +251,11 @@ Fixed
0.5.0 - 30.08.2017
--------------------------------------------------------------------------------
Added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Added**
#. Collect all data type conversion codes as service.py.
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#19 <https://github.com/pyexcel/pyexcel-io/issues/19>`_, use cString by
default. For python, it will be a performance boost
@ -155,8 +263,7 @@ Updated
0.4.4 - 08.08.2017
--------------------------------------------------------------------------------
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#42 <https://github.com/pyexcel/pyexcel-io/issues/42>`_, raise exception if
database table name does not match the sheet name
@ -164,8 +271,7 @@ Updated
0.4.3 - 29.07.2017
--------------------------------------------------------------------------------
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#41 <https://github.com/pyexcel/pyexcel-io/issues/41>`_, walk away
gracefully when mmap is not available.
@ -173,8 +279,7 @@ Updated
0.4.2 - 05.07.2017
--------------------------------------------------------------------------------
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#37 <https://github.com/pyexcel/pyexcel-io/issues/37>`_, permanently fix the
residue folder pyexcel by release all future releases in a clean clone.
@ -182,8 +287,7 @@ Updated
0.4.1 - 29.06.2017
--------------------------------------------------------------------------------
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#39 <https://github.com/pyexcel/pyexcel-io/issues/39>`_, raise exception
when bulk save in django fails. Please `bulk_save=False` if you as the
@ -195,8 +299,7 @@ Updated
0.4.0 - 19.06.2017
--------------------------------------------------------------------------------
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. 'built-in' as the value to the parameter 'library' as parameter to invoke
pyexcel-io's built-in csv, tsv, csvz, tsvz, django and sql won't work. It is
@ -207,16 +310,14 @@ Updated
handle are made sure to be closed. File close mechanism is enfored.
#. iget_data function is introduced to cope with dangling file handle issue.
Removed
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Removed**
#. Removed plugin loading code and lml is used instead
0.3.4 - 18.05.2017
--------------------------------------------------------------------------------
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#33 <https://github.com/pyexcel/pyexcel-io/issues/33>`_, handle mmap object
differently given as file content. This issue has put in a priority to single
@ -232,16 +333,14 @@ Updated
0.3.3 - 30.03.2017
--------------------------------------------------------------------------------
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#31 <https://github.com/pyexcel/pyexcel-io/issues/31>`_, support pyinstaller
0.3.2 - 26.01.2017
--------------------------------------------------------------------------------
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#29 <https://github.com/pyexcel/pyexcel-io/issues/29>`_, change
skip_empty_rows to False by default
@ -249,13 +348,11 @@ Updated
0.3.1 - 21.01.2017
--------------------------------------------------------------------------------
Added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Added**
#. updated versions of extra packages
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#23 <https://github.com/pyexcel/pyexcel-io/issues/23>`_, provide helpful
message when old pyexcel plugin exists
@ -264,8 +361,7 @@ Updated
0.3.0 - 22.12.2016
--------------------------------------------------------------------------------
Added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Added**
#. lazy loading of plugins. for example, pyexcel-xls is not entirely loaded
until xls format is used at its first attempted reading or writing. Since it
@ -276,16 +372,14 @@ Added
0.2.6 - 21.12.2016
--------------------------------------------------------------------------------
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#24 <https://github.com/pyexcel/pyexcel-io/issues/24>`__, pass on batch_size
0.2.5 - 20.12.2016
--------------------------------------------------------------------------------
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#26 <https://github.com/pyexcel/pyexcel-io/issues/26>`__, performance issue
with getting the number of columns.
@ -293,8 +387,7 @@ Updated
0.2.4 - 24.11.2016
--------------------------------------------------------------------------------
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#23 <https://github.com/pyexcel/pyexcel-io/issues/23>`__, Failed to convert
long integer string in python 2 to its actual value
@ -302,8 +395,7 @@ Updated
0.2.3 - 16.09.2016
--------------------------------------------------------------------------------
Added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Added**
#. `#21 <https://github.com/pyexcel/pyexcel-io/issues/21>`__, choose subset from
data base tables for export
@ -313,16 +405,14 @@ Added
0.2.2 - 31.08.2016
--------------------------------------------------------------------------------
Added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Added**
#. support pagination. two pairs: start_row, row_limit and start_column,
column_limit help you deal with large files.
#. `skip_empty_rows=True` was introduced. To include empty rows, put it to
False.
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. `#20 <https://github.com/pyexcel/pyexcel-io/issues/20>`__, pyexcel-io
attempts to parse cell contents of 'infinity' as a float/int, crashes
@ -330,8 +420,7 @@ Updated
0.2.1 - 11.07.2016
--------------------------------------------------------------------------------
Added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Added**
#. csv format: handle utf-16 encoded csv files. Potentially being able to decode
other formats if correct "encoding" is provided
@ -339,8 +428,7 @@ Added
supported
#. support stdin as input stream and stdout as output stream
Updated
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Updated**
#. Attention, user of pyexcel-io! No longer io stream validation is performed in
python 3. The guideline is: io.StringIO for csv, tsv only, otherwise BytesIO
@ -352,8 +440,7 @@ Updated
0.2.0 - 01.06.2016
--------------------------------------------------------------------------------
Added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Added**
#. autoload of pyexcel-io plugins
#. auto detect `datetime`, `float` and `int`. Detection can be switched off by
@ -362,7 +449,6 @@ Added
0.1.0 - 17.01.2016
--------------------------------------------------------------------------------
Added
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Added**
#. yield key word to return generator as content

13
CONTRIBUTORS.rst Normal file
View File

@ -0,0 +1,13 @@
6 contributors
================================================================================
In alphabetical order:
* `Craig Anderson <https://github.com/craiga>`_
* `John Vandenberg <https://github.com/jayvdb>`_
* `Stephen J. Fuhry <https://github.com/fuhrysteve>`_
* `Stephen Rauch <https://github.com/stephenrauch>`_
* `Vincent Raspal <https://github.com/vinraspa>`_
* `Víctor Antonio Hernández Monroy <https://github.com/antherkiv>`_

View File

@ -1,4 +1,4 @@
Copyright (c) 2015-2019 by Onni Software Ltd. and its contributors
Copyright (c) 2015-2022 by Onni Software Ltd. and its contributors
All rights reserved.
Redistribution and use in source and binary forms of the software as well
@ -13,7 +13,7 @@ that the following conditions are met:
and/or other materials provided with the distribution.
* Neither the name of 'pyexcel-io' nor the names of the contributors
may be used to endorse or promote products derived from this software
may not be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND

View File

@ -1,8 +1,8 @@
include README.rst
include LICENSE
include CHANGELOG.rst
include CONTRIBUTORS.rst
recursive-include tests *
include docs/source/*
recursive-include docs *
include Makefile
include test.sh

View File

@ -1,8 +1,16 @@
all: test
test:
test: lint
bash test.sh
document:
sphinx-autogen -o docs/source/generated/ docs/source/*.rst
sphinx-build -b html docs/source/ docs/build/
install_test:
pip install -r tests/requirements.txt
lint:
bash lint.sh
format:
bash format.sh
git-diff-check:
git diff --exit-code

View File

@ -3,20 +3,37 @@ pyexcel-io - Let you focus on data, instead of file formats
================================================================================
.. image:: https://raw.githubusercontent.com/pyexcel/pyexcel.github.io/master/images/patreon.png
:target: https://www.patreon.com/pyexcel
:target: https://www.patreon.com/chfw
.. image:: https://api.bountysource.com/badge/team?team_id=288537
:target: https://salt.bountysource.com/teams/chfw-pyexcel
.. image:: https://raw.githubusercontent.com/pyexcel/pyexcel-mobans/master/images/awesome-badge.svg
:target: https://awesome-python.com/#specific-formats-processing
.. image:: https://travis-ci.org/pyexcel/pyexcel-io.svg?branch=master
:target: http://travis-ci.org/pyexcel/pyexcel-io
.. image:: https://github.com/pyexcel/pyexcel-io/workflows/run_tests/badge.svg
:target: http://github.com/pyexcel/pyexcel-io/actions
.. image:: https://codecov.io/gh/pyexcel/pyexcel-io/branch/master/graph/badge.svg
:target: https://codecov.io/gh/pyexcel/pyexcel-io
.. image:: https://badge.fury.io/py/pyexcel-io.svg
:target: https://pypi.org/project/pyexcel-io
.. image:: https://anaconda.org/conda-forge/pyexcel-io/badges/version.svg
:target: https://anaconda.org/conda-forge/pyexcel-io
.. image:: https://pepy.tech/badge/pyexcel-io/month
:target: https://pepy.tech/project/pyexcel-io
.. image:: https://anaconda.org/conda-forge/pyexcel-io/badges/downloads.svg
:target: https://anaconda.org/conda-forge/pyexcel-io
.. image:: https://img.shields.io/gitter/room/gitterHQ/gitter.svg
:target: https://gitter.im/pyexcel/Lobby
.. image:: https://img.shields.io/static/v1?label=continuous%20templating&message=%E6%A8%A1%E7%89%88%E6%9B%B4%E6%96%B0&color=blue&style=flat-square
:target: https://moban.readthedocs.io/en/latest/#at-scale-continous-templating-for-open-source-projects
.. image:: https://img.shields.io/static/v1?label=coding%20style&message=black&color=black&style=flat-square
:target: https://github.com/psf/black
.. image:: https://readthedocs.org/projects/pyexcel-io/badge/?version=latest
:target: http://pyexcel-io.readthedocs.org/en/latest/
@ -24,7 +41,7 @@ Support the project
================================================================================
If your company has embedded pyexcel and its components into a revenue generating
product, please support me on `patreon <https://www.patreon.com/bePatron?u=5537627>`_
product, please support me on github, `patreon <https://www.patreon.com/bePatron?u=5537627>`_
or `bounty source <https://salt.bountysource.com/teams/chfw-pyexcel>`_ to maintain
the project and develop it further.
@ -43,6 +60,8 @@ Known constraints
Fonts, colors and charts are not supported.
Nor to read password protected xls, xlsx and ods files.
Introduction
================================================================================
@ -57,48 +76,47 @@ sqlalchemy supported databases. Its supported file formats are extended to cover
.. table:: A list of file formats supported by external plugins
======================== ======================= ================= ==================
Package name Supported file formats Dependencies Python versions
======================== ======================= ================= ==================
`pyexcel-io`_ csv, csvz [#f1]_, tsv, 2.6, 2.7, 3.3,
tsvz [#f2]_ 3.4, 3.5, 3.6
pypy
`pyexcel-xls`_ xls, xlsx(read only), `xlrd`_, same as above
======================== ======================= =================
Package name Supported file formats Dependencies
======================== ======================= =================
`pyexcel-io`_ csv, csvz [#f1]_, tsv,
tsvz [#f2]_
`pyexcel-xls`_ xls, xlsx(read only), `xlrd`_,
xlsm(read only) `xlwt`_
`pyexcel-xlsx`_ xlsx `openpyxl`_ same as above
`pyexcel-ods3`_ ods `pyexcel-ezodf`_, 2.6, 2.7, 3.3, 3.4
lxml 3.5, 3.6
`pyexcel-ods`_ ods `odfpy`_ same as above
======================== ======================= ================= ==================
`pyexcel-xlsx`_ xlsx `openpyxl`_
`pyexcel-ods3`_ ods `pyexcel-ezodf`_,
lxml
`pyexcel-ods`_ ods `odfpy`_
======================== ======================= =================
.. table:: Dedicated file reader and writers
======================== ======================= ================= ==================
Package name Supported file formats Dependencies Python versions
======================== ======================= ================= ==================
`pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_ Python 2 and 3
`pyexcel-xlsxr`_ xlsx(read only) lxml same as above
`pyexcel-odsr`_ read only for ods, fods lxml same as above
`pyexcel-htmlr`_ html(read only) lxml,html5lib same as above
======================== ======================= ================= ==================
======================== ======================= =================
Package name Supported file formats Dependencies
======================== ======================= =================
`pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_
`pyexcel-libxlsxw`_ xlsx(write only) `libxlsxwriter`_
`pyexcel-xlsxr`_ xlsx(read only) lxml
`pyexcel-xlsbr`_ xlsb(read only) pyxlsb
`pyexcel-odsr`_ read only for ods, fods lxml
`pyexcel-odsw`_ write only for ods loxun
`pyexcel-htmlr`_ html(read only) lxml,html5lib
`pyexcel-pdfr`_ pdf(read only) camelot
======================== ======================= =================
.. _pyexcel-io: https://github.com/pyexcel/pyexcel-io
.. _pyexcel-xls: https://github.com/pyexcel/pyexcel-xls
.. _pyexcel-xlsx: https://github.com/pyexcel/pyexcel-xlsx
.. _pyexcel-ods: https://github.com/pyexcel/pyexcel-ods
.. _pyexcel-ods3: https://github.com/pyexcel/pyexcel-ods3
.. _pyexcel-odsr: https://github.com/pyexcel/pyexcel-odsr
.. _pyexcel-xlsxw: https://github.com/pyexcel/pyexcel-xlsxw
.. _pyexcel-xlsxr: https://github.com/pyexcel/pyexcel-xlsxr
.. _pyexcel-htmlr: https://github.com/pyexcel/pyexcel-htmlr
Plugin shopping guide
------------------------
.. _xlrd: https://github.com/python-excel/xlrd
.. _xlwt: https://github.com/python-excel/xlwt
.. _openpyxl: https://bitbucket.org/openpyxl/openpyxl
.. _XlsxWriter: https://github.com/jmcnamara/XlsxWriter
.. _pyexcel-ezodf: https://github.com/pyexcel/pyexcel-ezodf
.. _odfpy: https://github.com/eea/odfpy
Since 2020, all pyexcel-io plugins have dropped the support for python versions
which are lower than 3.6. If you want to use any of those Python versions, please use pyexcel-io
and its plugins versions that are lower than 0.6.0.
Except csv files, xls, xlsx and ods files are a zip of a folder containing a lot of
xml files
The dedicated readers for excel files can stream read
In order to manage the list of plugins installed, you need to use pip to add or remove
@ -108,6 +126,32 @@ in your environment, you need to tell pyexcel which plugin to use per function c
For example, pyexcel-ods and pyexcel-odsr, and you want to get_array to use pyexcel-odsr.
You need to append get_array(..., library='pyexcel-odsr').
.. _pyexcel-io: https://github.com/pyexcel/pyexcel-io
.. _pyexcel-xls: https://github.com/pyexcel/pyexcel-xls
.. _pyexcel-xlsx: https://github.com/pyexcel/pyexcel-xlsx
.. _pyexcel-ods: https://github.com/pyexcel/pyexcel-ods
.. _pyexcel-ods3: https://github.com/pyexcel/pyexcel-ods3
.. _pyexcel-odsr: https://github.com/pyexcel/pyexcel-odsr
.. _pyexcel-odsw: https://github.com/pyexcel/pyexcel-odsw
.. _pyexcel-pdfr: https://github.com/pyexcel/pyexcel-pdfr
.. _pyexcel-xlsxw: https://github.com/pyexcel/pyexcel-xlsxw
.. _pyexcel-libxlsxw: https://github.com/pyexcel/pyexcel-libxlsxw
.. _pyexcel-xlsxr: https://github.com/pyexcel/pyexcel-xlsxr
.. _pyexcel-xlsbr: https://github.com/pyexcel/pyexcel-xlsbr
.. _pyexcel-htmlr: https://github.com/pyexcel/pyexcel-htmlr
.. _xlrd: https://github.com/python-excel/xlrd
.. _xlwt: https://github.com/python-excel/xlwt
.. _openpyxl: https://bitbucket.org/openpyxl/openpyxl
.. _XlsxWriter: https://github.com/jmcnamara/XlsxWriter
.. _pyexcel-ezodf: https://github.com/pyexcel/pyexcel-ezodf
.. _odfpy: https://github.com/eea/odfpy
.. _libxlsxwriter: http://libxlsxwriter.github.io/getting_started.html
.. rubric:: Footnotes
.. [#f1] zipped csv file
@ -161,7 +205,7 @@ Then install relevant development requirements:
#. pip install -r tests/requirements.txt
Once you have finished your changes, please provide test case(s), relevant documentation
and update CHANGELOG.rst.
and update changelog.yml
.. note::
@ -180,41 +224,20 @@ On Linux/Unix systems, please launch your tests like this::
$ make
On Windows systems, please issue this command::
On Windows, please issue this command::
> test.bat
How to update test environment and update documentation
---------------------------------------------------------
Additional steps are required:
Before you commit
------------------------------
#. pip install moban
#. git clone https://github.com/moremoban/setupmobans.git # generic setup
#. git clone https://github.com/pyexcel/pyexcel-commons.git commons
#. make your changes in `.moban.d` directory, then issue command `moban`
Please run::
What is pyexcel-commons
---------------------------------
$ make format
Many information that are shared across pyexcel projects, such as: this developer guide, license info, etc. are stored in `pyexcel-commons` project.
so as to beautify your code otherwise your build may fail your unit test.
What is .moban.d
---------------------------------
`.moban.d` stores the specific meta data for the library.
Acceptance criteria
-------------------
#. Has Test cases written
#. Has all code lines tested
#. Passes all Travis CI builds
#. Has fair amount of documentation if your change is complex
#. run 'make format' so as to confirm the pyexcel organisation's coding style
#. Please update CHANGELOG.rst
#. Please add yourself to CONTRIBUTORS.rst
#. Agree on NEW BSD License for your contribution

View File

@ -1,6 +1,99 @@
name: pyexcel-io
organisation: pyexcel
releases:
- changes:
- action: updated
details:
- "`#112`: Log Empty Row Warning instead 'print' "
version: 0.6.6
date: 31.1.2022
- changes:
- action: updated
details:
- "`#109`: enable ods3 to have datetime"
version: 0.6.5
date: 08.10.2021
- changes:
- action: updated
details:
- "`#102`: skip columns from imported excel sheet."
version: 0.6.4
date: 31.10.2020
- changes:
- action: fixed
details:
- "`#96`: regression: unknown file type shall trigger NoSupportingPluginFound"
- action: updated
details:
- "extra dependencies uses 0.6.0 based plugins"
version: 0.6.3
date: 12.10.2020
- changes:
- action: updated
details:
- "`#94`: keep backward compatibility for pyexcel-xls 0.4.1"
version: 0.6.2
date: 7.10.2020
- changes:
- action: removed
details:
- 'python 3.6 lower versions are no longer supported'
- action: updated
details:
- pyexcel-io plugin interface has been rewritten. PyInstaller user will be impacted.
please read 'Packaging with Pyinstaller' in the documentation.
- new query set reader plugin. pyexcel<=0.6.4 has used intrusive way of getting query set
source done. it is against the plugin interface.
- action: fixed
details:
- "`#74`: handle zip files which contain non-UTF-8 encoded files."
- action: added
details:
- "`#86`: allow trailing options, get_data(...keep_trailing_empty_cells=True)."
version: 0.6.1
date: 7.10.2020
- changes:
- action: updated
details:
- '`#70`: when the given file is a root directory, the error shall read it is not a file'
version: 0.5.20
date: 17.7.2019
- changes:
- action: updated
details:
- '`pyexcel#185`: handle stream conversion if file type(html) needs string content then bytes to handle'
version: 0.5.19
date: 14.7.2019
- changes:
- action: updated
details:
- '`#69`: Force file type(force_file_type) on write'
version: 0.5.18
date: 12.06.2019
- changes:
- action: updated
details:
- '`#68`: Raise IOError when the data file does not exist'
version: 0.5.17
date: 04.04.2019
- changes:
- action: updated
details:
- '`#67`: fix conversion issue for long type on python 2.7 for ods'
version: 0.5.16
date: 19.03.2019
- changes:
- action: updated
details:
- '`pyexcel-ods#33`: fix integer comparision error on i586'
version: 0.5.15
date: 16.03.2019
- changes:
- action: updated
details:
- '`#65`: add tests/__init__.py because python2.7 setup.py test needs it'
version: 0.5.14
date: 21.02.2019
- changes:
- action: updated
details:
@ -18,7 +111,7 @@ releases:
- action: updated
details:
- '`#59`: Please use scan_plugins_regex, which lml 0.7 complains about'
version: 0.5.10
version: 0.5.11
date: 3.12.2018
- changes:
- action: added

View File

@ -1,5 +1,5 @@
pyexcel_io.get_data
===================
pyexcel\_io.get\_data
=====================
.. currentmodule:: pyexcel_io

View File

@ -0,0 +1,6 @@
pyexcel\_io.iget\_data
======================
.. currentmodule:: pyexcel_io
.. autofunction:: iget_data

View File

@ -1,5 +1,5 @@
pyexcel_io.save_data
====================
pyexcel\_io.save\_data
======================
.. currentmodule:: pyexcel_io

View File

@ -2,9 +2,26 @@ Common parameters
================================================================================
'library' option is added
--------------------------------------------------------------------------------
In order to have overlapping plugins co-exist, 'library' option is added to
get_data and save_data.
get_data only parameters
--------------------------------------------------------------------------------
keep_trailing_empty_cells
********************************************************************************
default: False
If turned on, the return data will contain trailing empty cells.
auto_dectect_datetime
--------------------------------------------------------------------------------
********************************************************************************
The datetime formats are:
@ -14,11 +31,6 @@ The datetime formats are:
Any other datetime formats will be thrown as ValueError
'library' option is added
--------------------------------------------------------------------------------
In order to have overlapping plugins co-exit, 'library' option is added to
get_data and save_data.
csv only parameters
--------------------------------------------------------------------------------

View File

@ -4,13 +4,11 @@ DESCRIPTION = (
'format and to/from databases' +
''
)
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
@ -24,39 +22,24 @@ DESCRIPTION = (
# -- Project information -----------------------------------------------------
project = u'pyexcel-io'
copyright = u'2015-2018 Onni Software Ltd.'
author = u'C.W.'
project = 'pyexcel-io'
copyright = '2015-2022 Onni Software Ltd.'
author = 'C.W.'
# The short X.Y version
version = u'0.5.11'
version = '0.6.6'
# The full version, including alpha/beta/rc tags
release = u'0.5.11'
release = '0.6.6'
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.viewcode',]
extensions = [ 'sphinx.ext.autosummary', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.viewcode', 'sphinx.ext.autodoc',]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
@ -69,9 +52,6 @@ language = 'en'
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = None
# -- Options for HTML output -------------------------------------------------
@ -80,107 +60,16 @@ pygments_style = None
#
html_theme = 'alabaster'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'pyexcel-iodoc'
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'pyexcel-io.tex', u'pyexcel-io Documentation',
u'Onni Software Ltd.', 'manual'),
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'pyexcel-io', u'pyexcel-io Documentation',
[author], 1)
]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'pyexcel-io', u'pyexcel-io Documentation',
author, 'pyexcel-io', 'One line description of project.',
'Miscellaneous'),
]
# -- Options for Epub output -------------------------------------------------
# Bibliographic Dublin Core info.
epub_title = project
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''
# A unique identification for the text.
#
# epub_uid = ''
# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']
# -- Extension configuration -------------------------------------------------
# -- Options for intersphinx extension ---------------------------------------
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}
intersphinx_mapping = {'https://docs.python.org/3/': None}
# TODO: html_theme not configurable upstream
html_theme = 'default'
@ -201,3 +90,4 @@ texinfo_documents = [
intersphinx_mapping.update({
'pyexcel': ('http://pyexcel.readthedocs.io/en/latest/', None),
})
master_doc = "index"

View File

@ -74,6 +74,8 @@ Continue from previous example::
:hide:
>>> import os
>>> if os.path.exists("your_file.csv"):
... os.unlink("your_file.csv")
>>> os.unlink("your_file__Sheet 1__0.csv")
>>> os.unlink("your_file__Sheet 2__1.csv")

View File

@ -1,14 +1,160 @@
Working with xls, xlsx, and ods formats
Extend pyexcel-io for other excel or tabular formats
================================================================================
You are welcome to extend pyexcel-io to read and write more tabular formats.
No. 1 rule, your plugin must have a prefix 'pyexcel_' in its module path.
For example, `pyexcel-xls` has 'pyexcel_xls' as its module path. Otherwise,
pyexcel-io will not load your plugin.
On github, you will find two examples in `examples` folder. This section
explains its implementations to help you write yours.
.. note::
No longer, you will need to do explicit imports for pyexcel-io extensions.
Instead, you install them and manage them via pip.
Work with physical file
Simple Reader for a yaml file
--------------------------------------------------------------------------------
Suppose we have a yaml file, containing a dictionary where the values are
two dimensional array. The task is to write a reader plugin to pyexcel-io so that
we can use get_data() to read yaml file out.
.. literalinclude:: ../../examples/test.yaml
:language: yaml
**Implement IReader**
First, let's implement reader interface:
1. `content_array` attribute, is expected to be a list of `NamedContent`
2. `read_sheet` function, read sheet content by its index.
3. `close` function, to clean up any file handle
.. literalinclude:: ../../examples/custom_yaml_reader.py
:language: python
:lines: 19-33
**Implement ISheet**
`YourSingleSheet` makes this simple task complex in order to show case its inner
workings. Two abstract functions require implementation:
1. `row_iterator`: should return a row: either content arary or content index as long as
`column_iterator` can use it to return the cell value.
2. `column_iterator`: should iterate cell value from the given row.
.. literalinclude:: ../../examples/custom_yaml_reader.py
:language: python
:lines: 8-16
**Plug in pyexcel-io**
Last thing is to register with pyexcel-io about your new reader. `relative_plugin_class_path`
meant reference from current module, how to refer to `YourReader`. `locations` meant
the physical presence of the data source: "file", "memory" or "content". "file" means
files on physical disk. "memory" means a file stream. "content" means a string buffer.
`stream_type` meant the type of the stream: binary for BytesIO and text for StringIO.
.. literalinclude:: ../../examples/custom_yaml_reader.py
:language: python
:lines: 36-41
Usually, this registration code was placed in __init__.py file at the top level of your
extension source tree. You can take a look at any pyexcel plugins for reference.
**Test your reader**
Let's run the following code and see if it works.
.. literalinclude:: ../../examples/custom_yaml_reader.py
:language: python
:lines: 43-45
You would see these in standard output:
.. code-block:: bash
$ python custom_yaml_reader.py
OrderedDict([('sheet 1', [[1, 2, 3], [2, 3, 4]]), ('sheet 2', [['A', 'B', 'C']])])
A writer to write content in yaml
--------------------------------------------------------------------------------
Now for the writer, let's write a pyexcel-io writer that write a dictionary of
two dimentaional arrays back into a yaml file seen above.
**Implement IWriter**
Two abstract functions are required:
1. `create_sheet` creates a native sheet by sheet name, that understands how to code up the native sheet. Interestingly, it returns your sheet.
2. `close` function closes file handle if any.
.. literalinclude:: ../../examples/custom_yaml_writer.py
:language: python
:lines: 18-30
**Implement ISheetWriter**
It is imagined that you will have your own sheet writer. You simply need to figure
out how to write a row. Row by row write action was already written by `ISheetWrier`.
.. literalinclude:: ../../examples/custom_yaml_writer.py
:language: python
:lines: 7-14
**Plug in pyexcel-io**
Like the reader plugin, we register a writer.
.. literalinclude:: ../../examples/custom_yaml_writer.py
:language: python
:lines: 33-38
**Test It**
Let's run the following code and please examine `mytest.yaml` yourself.
.. literalinclude:: ../../examples/custom_yaml_writer.py
:language: python
:lines: 40-46
And you shall find a file named 'mytest.yaml':
.. code-block:: bash
$ cat mytest.yaml
sheet 1:
- - 1
- 3
- 4
- - 2
- 4
- 9
sheet 2:
- - B
- C
- D
Other pyexcel-io plugins
-----------------------------------------------------------------------------
Get xls support
.. code-block::
$ pip install pyexcel-xls
Here's what is needed::
>>> from pyexcel_io import save_data
@ -23,27 +169,6 @@ And you can also get the data back::
[[1, 2, 3]]
Work with memory file
-----------------------------------------------------------------------------
Here is the sample code to work with memory file::
>>> from pyexcel_io.manager import get_io
>>> io = get_io("xls")
>>> data = [[1,2,3]]
>>> save_data(io, data, "xls")
The difference is that you have mention file type if you use :meth:`pyexcel_io.save_data`
And you can also get the data back::
>>> data = get_data(io, "xls")
>>> data['pyexcel_sheet1']
[[1, 2, 3]]
The same applies to :meth:`pyexcel_io.get_data`.
Other formats
-----------------------------------------------------------------------------

View File

@ -64,48 +64,47 @@ For individual excel file formats, please install them as you wish:
.. table:: A list of file formats supported by external plugins
======================== ======================= ================= ==================
Package name Supported file formats Dependencies Python versions
======================== ======================= ================= ==================
`pyexcel-io`_ csv, csvz [#f1]_, tsv, 2.6, 2.7, 3.3,
tsvz [#f2]_ 3.4, 3.5, 3.6
pypy
`pyexcel-xls`_ xls, xlsx(read only), `xlrd`_, same as above
======================== ======================= =================
Package name Supported file formats Dependencies
======================== ======================= =================
`pyexcel-io`_ csv, csvz [#f1]_, tsv,
tsvz [#f2]_
`pyexcel-xls`_ xls, xlsx(read only), `xlrd`_,
xlsm(read only) `xlwt`_
`pyexcel-xlsx`_ xlsx `openpyxl`_ same as above
`pyexcel-ods3`_ ods `pyexcel-ezodf`_, 2.6, 2.7, 3.3, 3.4
lxml 3.5, 3.6
`pyexcel-ods`_ ods `odfpy`_ same as above
======================== ======================= ================= ==================
`pyexcel-xlsx`_ xlsx `openpyxl`_
`pyexcel-ods3`_ ods `pyexcel-ezodf`_,
lxml
`pyexcel-ods`_ ods `odfpy`_
======================== ======================= =================
.. table:: Dedicated file reader and writers
======================== ======================= ================= ==================
Package name Supported file formats Dependencies Python versions
======================== ======================= ================= ==================
`pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_ Python 2 and 3
`pyexcel-xlsxr`_ xlsx(read only) lxml same as above
`pyexcel-odsr`_ read only for ods, fods lxml same as above
`pyexcel-htmlr`_ html(read only) lxml,html5lib same as above
======================== ======================= ================= ==================
======================== ======================= =================
Package name Supported file formats Dependencies
======================== ======================= =================
`pyexcel-xlsxw`_ xlsx(write only) `XlsxWriter`_
`pyexcel-libxlsxw`_ xlsx(write only) `libxlsxwriter`_
`pyexcel-xlsxr`_ xlsx(read only) lxml
`pyexcel-xlsbr`_ xlsb(read only) pyxlsb
`pyexcel-odsr`_ read only for ods, fods lxml
`pyexcel-odsw`_ write only for ods loxun
`pyexcel-htmlr`_ html(read only) lxml,html5lib
`pyexcel-pdfr`_ pdf(read only) camelot
======================== ======================= =================
.. _pyexcel-io: https://github.com/pyexcel/pyexcel-io
.. _pyexcel-xls: https://github.com/pyexcel/pyexcel-xls
.. _pyexcel-xlsx: https://github.com/pyexcel/pyexcel-xlsx
.. _pyexcel-ods: https://github.com/pyexcel/pyexcel-ods
.. _pyexcel-ods3: https://github.com/pyexcel/pyexcel-ods3
.. _pyexcel-odsr: https://github.com/pyexcel/pyexcel-odsr
.. _pyexcel-xlsxw: https://github.com/pyexcel/pyexcel-xlsxw
.. _pyexcel-xlsxr: https://github.com/pyexcel/pyexcel-xlsxr
.. _pyexcel-htmlr: https://github.com/pyexcel/pyexcel-htmlr
Plugin shopping guide
------------------------
.. _xlrd: https://github.com/python-excel/xlrd
.. _xlwt: https://github.com/python-excel/xlwt
.. _openpyxl: https://bitbucket.org/openpyxl/openpyxl
.. _XlsxWriter: https://github.com/jmcnamara/XlsxWriter
.. _pyexcel-ezodf: https://github.com/pyexcel/pyexcel-ezodf
.. _odfpy: https://github.com/eea/odfpy
Since 2020, all pyexcel-io plugins have dropped the support for python versions
which are lower than 3.6. If you want to use any of those Python versions, please use pyexcel-io
and its plugins versions that are lower than 0.6.0.
Except csv files, xls, xlsx and ods files are a zip of a folder containing a lot of
xml files
The dedicated readers for excel files can stream read
In order to manage the list of plugins installed, you need to use pip to add or remove
@ -115,6 +114,32 @@ in your environment, you need to tell pyexcel which plugin to use per function c
For example, pyexcel-ods and pyexcel-odsr, and you want to get_array to use pyexcel-odsr.
You need to append get_array(..., library='pyexcel-odsr').
.. _pyexcel-io: https://github.com/pyexcel/pyexcel-io
.. _pyexcel-xls: https://github.com/pyexcel/pyexcel-xls
.. _pyexcel-xlsx: https://github.com/pyexcel/pyexcel-xlsx
.. _pyexcel-ods: https://github.com/pyexcel/pyexcel-ods
.. _pyexcel-ods3: https://github.com/pyexcel/pyexcel-ods3
.. _pyexcel-odsr: https://github.com/pyexcel/pyexcel-odsr
.. _pyexcel-odsw: https://github.com/pyexcel/pyexcel-odsw
.. _pyexcel-pdfr: https://github.com/pyexcel/pyexcel-pdfr
.. _pyexcel-xlsxw: https://github.com/pyexcel/pyexcel-xlsxw
.. _pyexcel-libxlsxw: https://github.com/pyexcel/pyexcel-libxlsxw
.. _pyexcel-xlsxr: https://github.com/pyexcel/pyexcel-xlsxr
.. _pyexcel-xlsbr: https://github.com/pyexcel/pyexcel-xlsbr
.. _pyexcel-htmlr: https://github.com/pyexcel/pyexcel-htmlr
.. _xlrd: https://github.com/python-excel/xlrd
.. _xlwt: https://github.com/python-excel/xlwt
.. _openpyxl: https://bitbucket.org/openpyxl/openpyxl
.. _XlsxWriter: https://github.com/jmcnamara/XlsxWriter
.. _pyexcel-ezodf: https://github.com/pyexcel/pyexcel-ezodf
.. _odfpy: https://github.com/eea/odfpy
.. _libxlsxwriter: http://libxlsxwriter.github.io/getting_started.html
.. rubric:: Footnotes
.. [#f1] zipped csv file
@ -138,6 +163,7 @@ get_data(.., library='pyexcel-ods')
============= ======= ======== ======= ======== ======== ========
`pyexcel-io`_ `xls`_ `xlsx`_ `ods`_ `ods3`_ `odsr`_ `xlsxw`_
============= ======= ======== ======= ======== ======== ========
0.6.0+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+
0.5.10+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+
0.5.1+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+
0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x
@ -188,6 +214,7 @@ API
.. autosummary::
:toctree: api/
iget_data
get_data
save_data

View File

@ -60,3 +60,9 @@ Obvious, you could do both at the same time:
The pagination support is available across all pyexcel-io plugins.
.. testcode::
:hide:
>>> import os
>>> if os.path.exists("your_file.csv"):
... os.unlink("your_file.csv")

View File

@ -153,7 +153,7 @@ Here is an example to write a sentence of "Shui Dial Getou"[#f2] into a csv file
.. code-block:: python
>>> content = [[u'人有悲歡離合', u'月有陰晴圓缺']]
>>> content = [['löyly', 'löyly']]
>>> test_file = "test-utf8-BOM.csv"
>>> save_data(test_file, content, encoding="utf-8-sig", lineterminator="\n")
@ -172,4 +172,5 @@ When you read it back you will have to specify encoding too.
>>> import os
>>> os.unlink("your_file.csv")
>>> os.unlink("test-utf16-encoding.csv")
>>> os.unlink(test_file)

View File

@ -1,6 +1,27 @@
Packaging with PyInstaller
================================================================================
With pyexcel-io v0.6.0, the way to package it has been changed because
plugin interface update.
Built-in plugins for pyexcel-io
---------------------------------
In order to package every built-in plugins of pyexcel-io, you need to specify::
--hidden-import pyexcel_io.readers.csv_in_file
--hidden-import pyexcel_io.readers.csv_in_memory
--hidden-import pyexcel_io.readers.csv_content
--hidden-import pyexcel_io.readers.csvz
--hidden-import pyexcel_io.writers.csv_in_file
--hidden-import pyexcel_io.writers.csv_in_memory
--hidden-import pyexcel_io.writers.csvz_writer
--hidden-import pyexcel_io.database.importers.django
--hidden-import pyexcel_io.database.importers.sqlalchemy
--hidden-import pyexcel_io.database.exporters.django
--hidden-import pyexcel_io.database.exporters.sqlalchemy
With pyexcel-io v0.4.0, the way to package it has been changed because it
uses lml for all plugins.
@ -14,9 +35,9 @@ In order to package every built-in plugins of pyexcel-io, you need to specify::
--hidden-import pyexcel_io.readers.tsv
--hidden-import pyexcel_io.readers.tsvz
--hidden-import pyexcel_io.writers.csvw
--hidden-import pyexcel_io.readers.csvz
--hidden-import pyexcel_io.readers.tsv
--hidden-import pyexcel_io.readers.tsvz
--hidden-import pyexcel_io.writers.csvz
--hidden-import pyexcel_io.writers.tsv
--hidden-import pyexcel_io.writers.tsvz
--hidden-import pyexcel_io.database.importers.django
--hidden-import pyexcel_io.database.importers.sqlalchemy
--hidden-import pyexcel_io.database.exporters.django

View File

@ -1,5 +1,4 @@
Rendering(Formatting) the data
================================================================================
You might want to do custom rendering on your data obtained. `row_renderer` was
@ -46,3 +45,10 @@ And you may want use row_renderer to customize it to string:
>>> data = get_data("your_file.csv", row_renderer=my_renderer)
>>> data['your_file.csv']
[['1', '21', '31'], ['2', '22', '32'], ['3', '23', '33']]
.. testcode::
:hide:
>>> import os
>>> if os.path.exists("your_file.csv"):
... os.unlink("your_file.csv")

View File

@ -0,0 +1,45 @@
import yaml
from pyexcel_io import get_data
from pyexcel_io.sheet import NamedContent
from pyexcel_io.plugins import IOPluginInfoChainV2
from pyexcel_io.plugin_api import ISheet, IReader
class YourSingleSheet(ISheet):
def __init__(self, your_native_sheet):
self.two_dimensional_array = your_native_sheet
def row_iterator(self):
yield from self.two_dimensional_array
def column_iterator(self, row):
yield from row
class YourReader(IReader):
def __init__(self, file_name, file_type, **keywords):
self.file_handle = open(file_name, "r")
self.native_book = yaml.load(self.file_handle)
self.content_array = [
NamedContent(key, values)
for key, values in self.native_book.items()
]
def read_sheet(self, sheet_index):
two_dimensional_array = self.content_array[sheet_index].payload
return YourSingleSheet(two_dimensional_array)
def close(self):
self.file_handle.close()
IOPluginInfoChainV2(__name__).add_a_reader(
relative_plugin_class_path="YourReader",
locations=["file"],
file_types=["yaml"],
stream_type="text",
)
if __name__ == "__main__":
data = get_data("test.yaml")
print(data)

View File

@ -0,0 +1,46 @@
import yaml
from pyexcel_io import save_data
from pyexcel_io.plugins import IOPluginInfoChainV2
from pyexcel_io.plugin_api import IWriter, ISheetWriter
class MySheetWriter(ISheetWriter):
def __init__(self, sheet_reference):
self.native_sheet = sheet_reference
def write_row(self, data_row):
self.native_sheet.append(data_row)
def close(self):
pass
class MyWriter(IWriter):
def __init__(self, file_name, file_type, **keywords):
self.file_name = file_name
self.content = {}
def create_sheet(self, name):
array = []
self.content[name] = array
return MySheetWriter(array)
def close(self):
with open(self.file_name, "w") as f:
f.write(yaml.dump(self.content, default_flow_style=False))
IOPluginInfoChainV2(__name__).add_a_writer(
relative_plugin_class_path="MyWriter",
locations=["file"],
file_types=["yaml"],
stream_type="text",
)
if __name__ == "__main__":
data_dict = {
"sheet 1": [[1, 3, 4], [2, 4, 9]],
"sheet 2": [["B", "C", "D"]],
}
save_data("mytest.yaml", data_dict)

11
examples/test.yaml Normal file
View File

@ -0,0 +1,11 @@
sheet 1:
- - 1
- 2
- 3
- - 2
- 3
- 4
sheet 2:
- - A
- B
- C

3
format.sh Normal file
View File

@ -0,0 +1,3 @@
isort $(find pyexcel_io -name "*.py"|xargs echo) $(find tests -name "*.py"|xargs echo)
black -l 79 pyexcel_io
black -l 79 tests

2
lint.sh Normal file
View File

@ -0,0 +1,2 @@
pip install flake8
flake8 --exclude=.moban.d,docs,setup.py --builtins=unicode,xrange,long . && python setup.py checkdocs

View File

@ -1,19 +1,27 @@
overrides: "pyexcel.yaml"
name: "pyexcel-io"
project: "pyexcel-io"
name: pyexcel-io
nick_name: io
version: 0.5.13
current_version: 0.5.13
release: 0.5.13
version: 0.6.6
current_version: 0.6.6
release: 0.6.6
copyright_year: 2015-2022
moban_command: false
is_on_conda: true
dependencies:
- ordereddict;python_version<"2.7"
- lml>=0.0.4
test_dependencies:
- pyexcel
- pyexcel-xls==0.5.9
- SQLAlchemy
- pyexcel-xlsxw
extra_dependencies:
- xls:
- pyexcel-xls>=0.5.0
- pyexcel-xls>=0.6.0
- xlsx:
- pyexcel-xlsx>=0.5.0
- pyexcel-xlsx>=0.6.0
- ods:
- pyexcel-ods3>=0.5.0
- pyexcel-ods3>=0.6.0
keywords:
- API
- tsv
@ -22,4 +30,12 @@ keywords:
- csvz
- django
- sqlalchemy
sphinx_extensions:
- sphinx.ext.autosummary
- sphinx.ext.doctest
- sphinx.ext.intersphinx
- sphinx.ext.viewcode
- sphinx.ext.autodoc
description: A python library to read and write structured data in csv, zipped csv format and to/from databases
python_requires: ">=3.6"
min_python_version: "3.6"

View File

@ -4,17 +4,18 @@
Uniform interface for reading/writing different excel file formats
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import logging
import pyexcel_io.plugins as plugins
from .io import get_data, iget_data, save_data # noqa
from ._compact import NullHandler
logging.getLogger(__name__).addHandler(NullHandler()) # noqa
from .io import get_data, iget_data, save_data # noqa
import pyexcel_io.plugins as plugins
BLACK_LIST = [__name__, "pyexcel_webio", "pyexcel_text"]
WHITE_LIST = [
@ -25,7 +26,5 @@ WHITE_LIST = [
PREFIX_PATTERN = "^pyexcel_.*$"
plugins.load_plugins(
PREFIX_PATTERN,
__path__, # noqa: F821
BLACK_LIST,
WHITE_LIST)
PREFIX_PATTERN, __path__, BLACK_LIST, WHITE_LIST # noqa: F821
)

View File

@ -4,29 +4,13 @@
Compatibles
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
# flake8: noqa
# pylint: disable=import-error
# pylint: disable=invalid-name
# pylint: disable=too-few-public-methods
# pylint: disable=ungrouped-imports
# pylint: disable=redefined-variable-type
import sys
import types
import logging
PY2 = sys.version_info[0] == 2
PY3_ABOVE = sys.version_info[0] >= 3
PY26 = PY2 and sys.version_info[1] < 7
PY27 = PY2 and sys.version_info[1] == 7
PY27_ABOVE = PY27 or PY3_ABOVE
if PY26:
from ordereddict import OrderedDict
else:
from collections import OrderedDict
from io import BytesIO, StringIO # noqa: F401
from collections import OrderedDict # noqa: F401
try:
from logging import NullHandler
@ -37,28 +21,13 @@ except ImportError:
pass
if PY2:
from cStringIO import StringIO
from cStringIO import StringIO as BytesIO
text_type = unicode
irange = xrange
class Iterator(object):
def next(self):
return type(self).__next__(self)
else:
from io import StringIO, BytesIO
text_type = str
Iterator = object
irange = range
text_type = str
irange = range
PY2 = sys.version[0] == 2
def isstream(instance):
""" check if a instance is a stream """
"""check if a instance is a stream"""
try:
import mmap
@ -72,11 +41,4 @@ def isstream(instance):
def is_string(atype):
"""find out if a type is str or not"""
if atype == str:
return True
elif PY2:
if atype == unicode:
return True
return False
return atype == str

View File

@ -4,13 +4,22 @@
The io interface to file extensions
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import warnings
import pyexcel_io.manager as manager
from pyexcel_io._compact import OrderedDict, isstream, PY2
from pyexcel_io._compact import OrderedDict, isstream
from .constants import MESSAGE_ERROR_03, MESSAGE_WRONG_IO_INSTANCE
DEPRECATED_SINCE_0_6_0 = (
"Deprecated since v0.6.0! "
+ "Although backward compatibility is preserved, "
+ "it is recommended to upgrade to get new features."
)
class RWInterface(object):
"""
@ -20,6 +29,7 @@ class RWInterface(object):
stream_type = None
def __init__(self):
warnings.warn(DEPRECATED_SINCE_0_6_0)
self._file_type = None
def open(self, file_name, **keywords):
@ -85,26 +95,15 @@ class BookReader(RWInterface):
keywords are passed on to individual readers
"""
if isstream(file_stream):
if PY2:
if hasattr(file_stream, "seek"):
file_stream.seek(0)
else:
# python 2
# Hei zipfile in odfpy would do a seek
# but stream from urlib cannot do seek
file_stream = _convert_content_to_stream(
file_stream.read(), self._file_type
)
else:
from io import UnsupportedOperation
from io import UnsupportedOperation
try:
file_stream.seek(0)
except UnsupportedOperation:
# python 3
file_stream = _convert_content_to_stream(
file_stream.read(), self._file_type
)
try:
file_stream.seek(0)
except UnsupportedOperation:
# python 3
file_stream = _convert_content_to_stream(
file_stream.read(), self._file_type
)
self._file_stream = file_stream
self._keywords = keywords
@ -231,6 +230,17 @@ class BookWriter(RWInterface):
def _convert_content_to_stream(file_content, file_type):
stream = manager.get_io(file_type)
target_content_type = manager.get_io_type(file_type)
needs_encode = target_content_type == "bytes" and not isinstance(
file_content, bytes
)
needs_decode = target_content_type == "string" and isinstance(
file_content, bytes
)
if needs_encode:
file_content = file_content.encode("utf-8")
elif needs_decode:
file_content = file_content.decode("utf-8")
stream.write(file_content)
stream.seek(0)
return stream

View File

@ -4,7 +4,7 @@
Constants appeared in pyexcel
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License
"""
# flake8: noqa
@ -16,6 +16,7 @@ MESSAGE_INVALID_PARAMETERS = "Invalid parameters"
MESSAGE_ERROR_02 = "No content, file name. Nothing is given"
MESSAGE_ERROR_03 = "cannot handle unknown content"
MESSAGE_WRONG_IO_INSTANCE = "Wrong io instance is passed for your file format."
MESSAGE_FILE_NAME_SHOULD_BE_STRING = "file_name should be a string"
MESSAGE_CANNOT_WRITE_STREAM_FORMATTER = (
"Cannot write content of file type %s to stream"
)
@ -31,6 +32,8 @@ MESSAGE_CANNOT_READ_FILE_TYPE_FORMATTER = (
MESSAGE_LOADING_FORMATTER = (
"The plugin for file type %s is not installed. Please install %s"
)
MESSAGE_NOT_FILE_FORMATTER = "%s is not a file"
MESSAGE_FILE_DOES_NOT_EXIST = "%s does not exist"
MESSAGE_EMPTY_ARRAY = "One empty row is found"
MESSAGE_IGNORE_ROW = "One row is ignored"
MESSAGE_DB_EXCEPTION = """
@ -44,8 +47,12 @@ FILE_FORMAT_ODS = "ods"
FILE_FORMAT_XLS = "xls"
FILE_FORMAT_XLSX = "xlsx"
FILE_FORMAT_XLSM = "xlsm"
FILE_FORMAT_XLSB = "xlsb"
FILE_FORMAT_HTML = "html"
FILE_FORMAT_PDF = "pdf"
DB_SQL = "sql"
DB_DJANGO = "django"
DB_QUERYSET = "queryset"
KEYWORD_TSV_DIALECT = "excel-tab"
KEYWORD_LINE_TERMINATOR = "lineterminator"

View File

@ -4,23 +4,30 @@
database data importer and exporter
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io.plugins import IOPluginInfoChain
from pyexcel_io.constants import DB_DJANGO, DB_SQL
from pyexcel_io.plugins import IOPluginInfoChainV2
from pyexcel_io.constants import DB_SQL, DB_DJANGO, DB_QUERYSET
IOPluginInfoChain(__name__).add_a_reader(
IOPluginInfoChainV2(__name__).add_a_reader(
relative_plugin_class_path="exporters.queryset.QueryReader",
locations=["file", "memory", "content"],
file_types=[DB_QUERYSET],
).add_a_reader(
relative_plugin_class_path="exporters.django.DjangoBookReader",
locations=["file", "memory", "content"],
file_types=[DB_DJANGO],
).add_a_writer(
relative_plugin_class_path="importers.django.DjangoBookWriter",
locations=["file", "content", "memory"],
file_types=[DB_DJANGO],
).add_a_reader(
relative_plugin_class_path="exporters.sqlalchemy.SQLBookReader",
locations=["file", "memory", "content"],
file_types=[DB_SQL],
).add_a_writer(
relative_plugin_class_path="importers.django.DjangoBookWriter",
file_types=[DB_DJANGO],
).add_a_writer(
relative_plugin_class_path="importers.sqlalchemy.SQLBookWriter",
locations=["file", "content", "memory"],
file_types=[DB_SQL],
)

View File

@ -4,31 +4,13 @@
Common classes shared among database importers and exporters
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io.book import BookReader
class DbExporter(BookReader):
""" Transcode the book reader interface to db interface """
def open(self, file_name, **keywords):
self.export_tables(self, file_name, **keywords)
def open_stream(self, file_stream, **keywords):
self.export_tables(self, file_stream, **keywords)
def open_content(self, file_content, **keywords):
self.export_tables(file_content, **keywords)
def export_tables(self, exporter, **keywords):
""" read database tables """
raise NotImplementedError("Please implement this method")
class DjangoModelExportAdapter(object):
""" django export parameter holder """
"""django export parameter holder"""
def __init__(self, model, export_columns=None):
self.model = model
@ -36,19 +18,19 @@ class DjangoModelExportAdapter(object):
@property
def name(self):
""" get database table name """
"""get database table name"""
return self.get_name()
def get_name(self):
""" get database table name """
"""get database table name"""
return self.model._meta.model_name
class DjangoModelImportAdapter(DjangoModelExportAdapter):
""" parameter holder for django data import """
"""parameter holder for django data import"""
class InOutParameter(object):
""" local class to manipulate variable io """
"""local class to manipulate variable io"""
def __init__(self):
self.output = None
@ -56,91 +38,96 @@ class DjangoModelImportAdapter(DjangoModelExportAdapter):
def __init__(self, model):
DjangoModelExportAdapter.__init__(self, model)
self.__column_names = self.InOutParameter()
self.__column_name_mapping_dict = self.InOutParameter()
self.__row_initializer = self.InOutParameter()
self._column_names = self.InOutParameter()
self._column_name_mapping_dict = self.InOutParameter()
self._row_initializer = self.InOutParameter()
self._process_parameters()
@property
def row_initializer(self):
""" contructor for a database table entry """
return self.__row_initializer.output
"""contructor for a database table entry"""
return self._row_initializer.output
@property
def column_names(self):
""" the desginated database column names """
return self.__column_names.output
"""the desginated database column names"""
return self._column_names.output
@property
def column_name_mapping_dict(self):
""" if not the same, a mapping dictionary is looked up"""
return self.__column_name_mapping_dict.output
"""if not the same, a mapping dictionary is looked up"""
return self._column_name_mapping_dict.output
@row_initializer.setter
def row_initializer(self, a_function):
""" set the contructor """
self.__row_initializer.input = a_function
"""set the contructor"""
self._row_initializer.input = a_function
self._process_parameters()
@column_names.setter
def column_names(self, column_names):
""" set the column names """
self.__column_names.input = column_names
"""set the column names"""
self._column_names.input = column_names
self._process_parameters()
@column_name_mapping_dict.setter
def column_name_mapping_dict(self, mapping_dict):
""" set the mapping dict """
self.__column_name_mapping_dict.input = mapping_dict
"""set the mapping dict"""
self._column_name_mapping_dict.input = mapping_dict
self._process_parameters()
def _process_parameters(self):
if self.__row_initializer.input is None:
self.__row_initializer.output = None
if self._row_initializer.input is None:
self._row_initializer.output = None
else:
self.__row_initializer.output = self.__row_initializer.input
if isinstance(self.__column_name_mapping_dict.input, list):
self.__column_names.output = self.__column_name_mapping_dict.input
self.__column_name_mapping_dict.output = None
elif isinstance(self.__column_name_mapping_dict.input, dict):
if self.__column_names.input:
self.__column_names.output = [
self.__column_name_mapping_dict.input[name]
for name in self.__column_names.input
]
self.__column_name_mapping_dict.output = None
if self.__column_names.output is None:
self.__column_names.output = self.__column_names.input
self._row_initializer.output = self._row_initializer.input
if isinstance(self._column_name_mapping_dict.input, list):
self._column_names.output = self._column_name_mapping_dict.input
self._column_name_mapping_dict.output = None
elif isinstance(self._column_name_mapping_dict.input, dict):
if self._column_names.input:
self._column_names.output = []
indices = []
for index, name in enumerate(self._column_names.input):
if name in self._column_name_mapping_dict.input:
self._column_names.output.append(
self._column_name_mapping_dict.input[name]
)
indices.append(index)
self._column_name_mapping_dict.output = indices
if self._column_names.output is None:
self._column_names.output = self._column_names.input
class DjangoModelExporter(object):
""" public interface for django model export """
"""public interface for django model export"""
def __init__(self):
self.adapters = []
def append(self, import_adapter):
""" store model parameter for more than one model """
"""store model parameter for more than one model"""
self.adapters.append(import_adapter)
class DjangoModelImporter(object):
""" public interface for django model import """
"""public interface for django model import"""
def __init__(self):
self.__adapters = {}
self._adapters = {}
def append(self, import_adapter):
""" store model parameter for more than one model """
self.__adapters[import_adapter.get_name()] = import_adapter
"""store model parameter for more than one model"""
self._adapters[import_adapter.get_name()] = import_adapter
def get(self, name):
""" get a parameter out """
return self.__adapters.get(name, None)
"""get a parameter out"""
return self._adapters.get(name, None)
class SQLTableExportAdapter(DjangoModelExportAdapter):
""" parameter holder for sql table data export """
"""parameter holder for sql table data export"""
def __init__(self, model, export_columns=None):
DjangoModelExportAdapter.__init__(self, model, export_columns)
@ -151,7 +138,7 @@ class SQLTableExportAdapter(DjangoModelExportAdapter):
class SQLTableImportAdapter(DjangoModelImportAdapter):
""" parameter holder for sqlalchemy table import """
"""parameter holder for sqlalchemy table import"""
def __init__(self, model):
DjangoModelImportAdapter.__init__(self, model)
@ -162,7 +149,7 @@ class SQLTableImportAdapter(DjangoModelImportAdapter):
class SQLTableExporter(DjangoModelExporter):
""" public interface for sql table export """
"""public interface for sql table export"""
def __init__(self, session):
DjangoModelExporter.__init__(self)
@ -170,7 +157,7 @@ class SQLTableExporter(DjangoModelExporter):
class SQLTableImporter(DjangoModelImporter):
""" public interface to do data import via sqlalchemy """
"""public interface to do data import via sqlalchemy"""
def __init__(self, session):
DjangoModelImporter.__init__(self)

View File

@ -4,16 +4,15 @@
The lower level handler for django import and export
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io.database.common import DbExporter
from pyexcel_io.plugin_api import IReader
from pyexcel_io.database.querysets import QuerysetsReader
class DjangoModelReader(QuerysetsReader):
"""Read from django model
"""
"""Read from django model"""
def __init__(self, model, export_columns=None, **keywords):
self.__model = model
@ -28,22 +27,20 @@ class DjangoModelReader(QuerysetsReader):
)
class DjangoBookReader(DbExporter):
""" read django models """
class DjangoBookReader(IReader):
"""read django models"""
def __init__(self):
DbExporter.__init__(self)
self.exporter = None
def __init__(self, exporter, _, **keywords):
self.exporter = exporter
self.keywords = keywords
self.content_array = self.exporter.adapters
def export_tables(self, file_content, **keywords):
self.exporter = file_content
self._load_from_django_models()
def read_sheet(self, native_sheet):
def read_sheet(self, native_sheet_index):
native_sheet = self.content_array[native_sheet_index]
reader = DjangoModelReader(
native_sheet.model, native_sheet.export_columns
native_sheet.model, export_columns=native_sheet.export_columns
)
return reader.to_array()
return reader
def _load_from_django_models(self):
self._native_book = self.exporter.adapters
def close(self):
pass

View File

@ -0,0 +1,20 @@
from pyexcel_io.plugin_api import IReader
from pyexcel_io.database.querysets import QuerysetsReader
class QueryReader(IReader):
def __init__(self, query_sets, _, column_names=None, **keywords):
self.query_sets = query_sets
self.column_names = column_names
self.keywords = keywords
self.content_array = [
QuerysetsReader(
self.query_sets, self.column_names, **self.keywords
)
]
def read_sheet(self, index):
return self.content_array[index]
def close(self):
pass

View File

@ -4,16 +4,15 @@
The lower level handler for database import and export
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io.database.common import DbExporter
from pyexcel_io.plugin_api import IReader
from pyexcel_io.database.querysets import QuerysetsReader
class SQLTableReader(QuerysetsReader):
"""Read a table
"""
"""Read a table"""
def __init__(self, session, table, export_columns=None, **keywords):
everything = session.query(table).all()
@ -32,24 +31,22 @@ class SQLTableReader(QuerysetsReader):
QuerysetsReader.__init__(self, everything, column_names, **keywords)
class SQLBookReader(DbExporter):
""" read a table via sqlalchemy """
class SQLBookReader(IReader):
"""read a table via sqlalchemy"""
def __init__(self):
DbExporter.__init__(self)
self.__exporter = None
def __init__(self, exporter, _, **keywords):
self.__exporter = exporter
self.content_array = self.__exporter.adapters
self.keywords = keywords
def export_tables(self, file_content, **keywords):
self.__exporter = file_content
self._load_from_tables()
def read_sheet(self, native_sheet):
def read_sheet(self, native_sheet_index):
native_sheet = self.content_array[native_sheet_index]
reader = SQLTableReader(
self.__exporter.session,
native_sheet.table,
native_sheet.export_columns,
)
return reader.to_array()
return reader
def _load_from_tables(self):
self._native_book = self.__exporter.adapters
def close(self):
pass

View File

@ -4,77 +4,75 @@
The lower level handler for django import and export
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import logging
from pyexcel_io.book import BookWriter
from pyexcel_io.sheet import SheetWriter
from pyexcel_io.utils import is_empty_array, swap_empty_string_for_none
import pyexcel_io.constants as constants
from pyexcel_io.utils import is_empty_array, swap_empty_string_for_none
from pyexcel_io.plugin_api import IWriter, ISheetWriter
log = logging.getLogger(__name__)
class DjangoModelWriter(SheetWriter):
""" import data into a django model """
class DjangoModelWriter(ISheetWriter):
"""import data into a django model"""
def __init__(self, importer, adapter, batch_size=None, bulk_save=True):
SheetWriter.__init__(self, importer, adapter, adapter.name)
self.__batch_size = batch_size
self.__model = adapter.model
self.__column_names = adapter.column_names
self.__mapdict = adapter.column_name_mapping_dict
self.__initializer = adapter.row_initializer
self.__objs = []
self.__bulk_save = bulk_save
self.batch_size = batch_size
self.model = adapter.model
self.column_names = adapter.column_names
self.mapdict = adapter.column_name_mapping_dict
self.initializer = adapter.row_initializer
self.objs = []
self.bulk_save = bulk_save
self.adapter = adapter
def write_row(self, array):
if is_empty_array(array):
print(constants.MESSAGE_EMPTY_ARRAY)
log.warning(constants.MESSAGE_EMPTY_ARRAY)
else:
new_array = swap_empty_string_for_none(array)
if self.mapdict:
another_new_array = []
for index, element in enumerate(new_array):
if index in self.mapdict:
another_new_array.append(element)
new_array = another_new_array
model_to_be_created = new_array
if self.__initializer is not None:
model_to_be_created = self.__initializer(new_array)
if self.initializer is not None:
model_to_be_created = self.initializer(new_array)
if model_to_be_created:
self.__objs.append(
self.__model(
**dict(zip(self.__column_names, model_to_be_created))
)
)
row = dict(zip(self.column_names, model_to_be_created))
self.objs.append(self.model(**row))
# else
# skip the row
def close(self):
if self.__bulk_save:
self.__model.objects.bulk_create(
self.__objs, batch_size=self.__batch_size
if self.bulk_save:
self.model.objects.bulk_create(
self.objs, batch_size=self.batch_size
)
else:
for an_object in self.__objs:
for an_object in self.objs:
an_object.save()
class DjangoBookWriter(BookWriter):
""" write data into django models """
class DjangoBookWriter(IWriter):
"""write data into django models"""
def __init__(self):
BookWriter.__init__(self)
self.__importer = None
def open_content(self, file_content, **keywords):
self.__importer = file_content
def __init__(self, exporter, _, **keywords):
self.importer = exporter
self._keywords = keywords
def create_sheet(self, sheet_name):
sheet_writer = None
model = self.__importer.get(sheet_name)
model = self.importer.get(sheet_name)
if model:
sheet_writer = DjangoModelWriter(
self.__importer,
self.importer,
model,
batch_size=self._keywords.get("batch_size", None),
bulk_save=self._keywords.get("bulk_save", True),
@ -86,3 +84,6 @@ class DjangoBookWriter(BookWriter):
)
return sheet_writer
def close(self):
pass

View File

@ -4,13 +4,16 @@
The lower level handler for database import and export
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io.book import BookWriter
from pyexcel_io.sheet import SheetWriter
from pyexcel_io.utils import is_empty_array, swap_empty_string_for_none
import logging
import pyexcel_io.constants as constants
from pyexcel_io.utils import is_empty_array, swap_empty_string_for_none
from pyexcel_io.plugin_api import IWriter, ISheetWriter
LOG = logging.getLogger(__name__)
class PyexcelSQLSkipRowException(Exception):
@ -22,66 +25,62 @@ class PyexcelSQLSkipRowException(Exception):
pass
class SQLTableWriter(SheetWriter):
"""Write to a table
"""
class SQLTableWriter(ISheetWriter):
"""Write to a table"""
def __init__(
self, importer, adapter, auto_commit=True, bulk_size=1000, **keywords
):
SheetWriter.__init__(
self, importer, adapter, adapter.get_name(), **keywords
)
self.__auto_commit = auto_commit
self.__count = 0
self.__bulk_size = bulk_size
self.adapter = adapter
self.importer = importer
def write_row(self, array):
if is_empty_array(array):
print(constants.MESSAGE_EMPTY_ARRAY)
LOG.warning(constants.MESSAGE_EMPTY_ARRAY)
else:
new_array = swap_empty_string_for_none(array)
try:
self._write_row(new_array)
except PyexcelSQLSkipRowException:
print(constants.MESSAGE_IGNORE_ROW)
print(new_array)
LOG.info(constants.MESSAGE_IGNORE_ROW)
LOG.info(new_array)
def _write_row(self, array):
row = dict(zip(self._native_sheet.column_names, array))
new_array = array
if self.adapter.column_name_mapping_dict:
another_new_array = []
for index, element in enumerate(new_array):
if index in self.adapter.column_name_mapping_dict:
another_new_array.append(element)
new_array = another_new_array
row = dict(zip(self.adapter.column_names, new_array))
obj = None
if self._native_sheet.row_initializer:
if self.adapter.row_initializer:
# allow initinalizer to return None
# if skipping is needed
obj = self._native_sheet.row_initializer(row)
obj = self.adapter.row_initializer(row)
if obj is None:
obj = self._native_sheet.table()
for name in self._native_sheet.column_names:
if self._native_sheet.column_name_mapping_dict is not None:
key = self._native_sheet.column_name_mapping_dict[name]
else:
key = name
setattr(obj, key, row[name])
self._native_book.session.add(obj)
obj = self.adapter.table()
for name in self.adapter.column_names:
setattr(obj, name, row[name])
self.importer.session.add(obj)
if self.__auto_commit and self.__bulk_size != float("inf"):
self.__count += 1
if self.__count % self.__bulk_size == 0:
self._native_book.session.commit()
self.importer.session.commit()
def close(self):
if self.__auto_commit:
self._native_book.session.commit()
self.importer.session.commit()
class SQLBookWriter(BookWriter):
""" write data into database tables via sqlalchemy """
class SQLBookWriter(IWriter):
"""write data into database tables via sqlalchemy"""
def __init__(self):
BookWriter.__init__(self)
self.__importer = None
self.__auto_commit = True
def open_content(self, file_content, auto_commit=True, **keywords):
def __init__(self, file_content, _, auto_commit=True, **keywords):
self.__importer = file_content
self.__auto_commit = auto_commit
@ -99,3 +98,6 @@ class SQLBookWriter(BookWriter):
)
return sheet_writer
def close(self):
pass

View File

@ -4,23 +4,27 @@
The lower level handler for querysets
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import datetime
from itertools import chain
from pyexcel_io.sheet import SheetReader
from pyexcel_io.constants import DEFAULT_SHEET_NAME
from pyexcel_io.plugin_api.abstract_sheet import ISheet
class QuerysetsReader(SheetReader):
""" turn querysets into an array """
class QuerysetsReader(ISheet):
"""turn querysets into an array"""
def __init__(self, query_sets, column_names, **keywords):
SheetReader.__init__(self, query_sets, **keywords)
def __init__(self, query_sets, column_names):
self.name = DEFAULT_SHEET_NAME
self.__column_names = column_names
self.__query_sets = query_sets
def row_iterator(self):
return chain([self.__column_names], self.__query_sets)
def to_array(self):
"""
Convert query sets into an array
@ -28,11 +32,11 @@ class QuerysetsReader(SheetReader):
if len(self.__query_sets) == 0:
yield []
for element in SheetReader.to_array(self):
yield element
def row_iterator(self):
return chain([self.__column_names], self.__query_sets)
for row in self.row_iterator():
row_values = []
for value in self.column_iterator(row):
row_values.append(value)
yield row_values
def column_iterator(self, row):
if self.__column_names is None:
@ -52,7 +56,7 @@ class QuerysetsReader(SheetReader):
def get_complex_attribute(row, attribute):
""" recursively get an attribute """
"""recursively get an attribute"""
attributes = attribute.split("__")
value = row
try:
@ -64,7 +68,7 @@ def get_complex_attribute(row, attribute):
def get_simple_attribute(row, attribute):
""" get dotted attribute """
"""get dotted attribute"""
value = getattr(row, attribute)
if isinstance(value, (datetime.date, datetime.time)):
value = value.isoformat()

View File

@ -4,7 +4,7 @@
all possible exceptions
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
@ -21,12 +21,6 @@ class SupportingPluginAvailableButNotInstalled(Exception):
pass
class UpgradePlugin(Exception):
"""raised when a known plugin is not compatible"""
pass
class IntegerAccuracyLossError(Exception):
"""
When an interger is greater than 999999999999999, ods loses its accuracy.
@ -46,11 +40,13 @@ class IntegerAccuracyLossError(Exception):
b=get_sheet(file_name='abc.ods')
b[0,0] != s[0,0]
"""
def __init__(self, message):
custom_message = (
message + '\n' +
"In order to keep its accuracy, please save as string. Then " +
"convert to int, long or float after the value will be read back"
message
+ "\n"
+ "In order to keep its accuracy, please save as string. Then "
+ "convert to int, long or float after the value will be read back"
)
super(IntegerAccuracyLossError, self).__init__(custom_message)

View File

@ -4,20 +4,60 @@
The io interface to file extensions
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from types import GeneratorType
import os
import warnings
from types import GeneratorType
from pyexcel_io._compact import isstream, PY2
from pyexcel_io.plugins import READERS, WRITERS
import pyexcel_io.constants as constants
from pyexcel_io import constants
from pyexcel_io.reader import Reader
from pyexcel_io.writer import Writer
from pyexcel_io.plugins import OLD_READERS, OLD_WRITERS
from pyexcel_io._compact import isstream
from pyexcel_io.exceptions import (
NoSupportingPluginFound,
SupportingPluginAvailableButNotInstalled,
)
def iget_data(afile, file_type=None, **keywords):
"""Get data from an excel file source
The data has not gone into memory yet. If you use dedicated partial read
plugins, such as pyexcel-xlsxr, pyexcel-odsr, you will notice
the memory consumption drop when you work with big files.
:param afile: a file name, a file stream or actual content
:param sheet_name: the name of the sheet to be loaded
:param sheet_index: the index of the sheet to be loaded
:param sheets: a list of sheet to be loaded
:param file_type: used only when filename is not a physical file name
:param force_file_type: used only when filename refers to a physical file
and it is intended to open it as forced file type.
:param library: explicitly name a library for use.
e.g. library='pyexcel-ods'
:param auto_detect_float: defaults to True
:param auto_detect_int: defaults to True
:param auto_detect_datetime: defaults to True
:param ignore_infinity: defaults to True
:param ignore_nan_text: various forms of 'NaN', 'nan' are ignored
:param default_float_nan: choose one form of 'NaN', 'nan'
:param pep_0515_off: turn off pep 0515. default to True.
:param keep_trailing_empty_cells: keep trailing columns. default to False
:param keywords: any other library specific parameters
:returns: an ordered dictionary
"""
data, reader = _get_data(
afile, file_type=file_type, streaming=True, **keywords
)
return data, reader
def get_data(afile, file_type=None, streaming=None, **keywords):
"""Get data from an excel file source
:param afile: a file name, a file stream or actual content
:param sheet_name: the name of the sheet to be loaded
:param sheet_index: the index of the sheet to be loaded
@ -37,31 +77,7 @@ def iget_data(afile, file_type=None, **keywords):
:param ignore_nan_text: various forms of 'NaN', 'nan' are ignored
:param default_float_nan: choose one form of 'NaN', 'nan'
:param pep_0515_off: turn off pep 0515. default to True.
:param keywords: any other library specific parameters
:returns: an ordered dictionary
"""
data, reader = _get_data(
afile, file_type=file_type, streaming=True, **keywords
)
return data, reader
def get_data(afile, file_type=None, streaming=None, **keywords):
"""Get data from an excel file source
:param afile: a file name, a file stream or actual content
:param sheet_name: the name of the sheet to be loaded
:param sheet_index: the index of the sheet to be loaded
:param file_type: used only when filename is not a physial file name
:param streaming: toggles the type of returned data. The values of the
returned dictionary remain as generator if it is set
to True. Default is False.
:param library: explicitly name a library for use.
e.g. library='pyexcel-ods'
:param auto_detect_float: defaults to True
:param auto_detect_int: defaults to True
:param auto_detect_datetime: defaults to True
:param ignore_infinity: defaults to True
:param keep_trailing_empty_cells: keep trailing columns. default to False
:param keywords: any other library specific parameters
:returns: an ordered dictionary
"""
@ -97,6 +113,8 @@ def save_data(afile, data, file_type=None, **keywords):
:param filename: actual file name, a file stream or actual content
:param data: a dictionary but an ordered dictionary is preferred
:param file_type: used only when filename is not a physial file name
:param force_file_type: used only when filename refers to a physical file
and it is intended to open it as forced file type.
:param library: explicitly name a library for use.
e.g. library='pyexcel-ods'
:param keywords: any other parameters that python csv module's
@ -109,39 +127,20 @@ def save_data(afile, data, file_type=None, **keywords):
single_sheet_in_book = True
to_store = {constants.DEFAULT_SHEET_NAME: data}
else:
if PY2:
keys = data.keys()
else:
keys = list(data.keys())
keys = list(data.keys())
single_sheet_in_book = len(keys) == 1
no_file_type = isstream(afile) and file_type is None
if no_file_type:
file_type = constants.FILE_FORMAT_CSV
store_data(
afile,
to_store,
file_type=file_type,
single_sheet_in_book=single_sheet_in_book,
**keywords
)
def store_data(afile, data, file_type=None, **keywords):
"""Non public function to store data to afile
:param filename: actual file name, a file stream or actual content
:param data: the data to be written
:param file_type: used only when filename is not a physial file name
:param keywords: any other parameters
"""
if isstream(afile):
keywords.update(dict(file_stream=afile, file_type=file_type))
else:
keywords.update(dict(file_name=afile, file_type=file_type))
keywords["single_sheet_in_book"] = single_sheet_in_book
with get_writer(**keywords) as writer:
writer.write(data)
writer.write(to_store)
def load_data(
@ -180,34 +179,61 @@ def load_data(
try:
file_type = file_name.split(".")[-1]
except AttributeError:
raise Exception("file_name should be a string type")
raise Exception(constants.MESSAGE_FILE_NAME_SHOULD_BE_STRING)
reader = READERS.get_a_plugin(file_type, library)
if file_name:
reader.open(file_name, **keywords)
elif file_content:
reader.open_content(file_content, **keywords)
elif file_stream:
reader.open_stream(file_stream, **keywords)
if sheet_name:
result = reader.read_sheet_by_name(sheet_name)
elif sheet_index is not None:
result = reader.read_sheet_by_index(sheet_index)
elif sheets is not None:
result = reader.read_many(sheets)
else:
result = reader.read_all()
if streaming is False:
for key in result.keys():
result[key] = list(result[key])
reader.close()
reader = None
try:
reader = OLD_READERS.get_a_plugin(file_type, library)
except (NoSupportingPluginFound, SupportingPluginAvailableButNotInstalled):
reader = Reader(file_type, library)
return result, reader
try:
if file_name:
reader.open(file_name, **keywords)
elif file_content:
reader.open_content(file_content, **keywords)
elif file_stream:
reader.open_stream(file_stream, **keywords)
else:
raise IOError("Unrecognized options")
if sheet_name:
result = reader.read_sheet_by_name(sheet_name)
elif sheet_index is not None:
result = reader.read_sheet_by_index(sheet_index)
elif sheets is not None:
result = reader.read_many(sheets)
else:
result = reader.read_all()
if streaming is False:
for key in result.keys():
result[key] = list(result[key])
reader.close()
reader = None
return result, reader
except NoSupportingPluginFound:
if file_name:
if os.path.exists(file_name):
if os.path.isfile(file_name):
raise
else:
raise IOError(
constants.MESSAGE_NOT_FILE_FORMATTER % file_name
)
else:
raise IOError(
constants.MESSAGE_FILE_DOES_NOT_EXIST % file_name
)
else:
raise
def get_writer(
file_name=None, file_stream=None, file_type=None, library=None, **keywords
file_name=None,
file_stream=None,
file_type=None,
library=None,
force_file_type=None,
**keywords
):
"""find a suitable writer"""
inputs = [file_name, file_stream]
@ -217,15 +243,23 @@ def get_writer(
raise IOError(constants.MESSAGE_ERROR_02)
file_type_given = True
if file_type is None and file_name:
try:
file_type = file_name.split(".")[-1]
except AttributeError:
raise Exception("file_name should be a string type")
if force_file_type:
file_type = force_file_type
else:
try:
file_type = file_name.split(".")[-1]
except AttributeError:
raise Exception(constants.MESSAGE_FILE_NAME_SHOULD_BE_STRING)
file_type_given = False
writer = WRITERS.get_a_plugin(file_type, library)
try:
writer = OLD_WRITERS.get_a_plugin(file_type, library)
except (NoSupportingPluginFound, SupportingPluginAvailableButNotInstalled):
writer = Writer(file_type, library)
if file_name:
if file_type_given:
writer.open_content(file_name, **keywords)
@ -235,3 +269,7 @@ def get_writer(
writer.open_stream(file_stream, **keywords)
# else: is resolved by earlier raise statement
return writer
# backward compactibility
store_data = save_data

View File

@ -4,11 +4,10 @@
Control file streams
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io._compact import StringIO, BytesIO
from pyexcel_io._compact import BytesIO, StringIO
MIME_TYPES = {}
FILE_TYPES = ()

View File

@ -0,0 +1,3 @@
from .abstract_sheet import ISheet, ISheetWriter, NamedContent # noqa: F401
from .abstract_reader import IReader # noqa: F401
from .abstract_writer import IWriter # noqa: F401

View File

@ -0,0 +1,21 @@
from .abstract_sheet import ISheet
class IReader(object):
"""
content_array should be a list of NamedContent
where: name is the sheet name,
payload is the native sheet.
"""
def read_sheet(self, sheet_index) -> ISheet:
raise NotImplementedError("Read the sheet by index")
def sheet_names(self):
return [content.name for content in self.content_array]
def __len__(self):
return len(self.content_array)
def close(self):
raise NotImplementedError("Close the file")

View File

@ -0,0 +1,31 @@
class ISheet(object):
def row_iterator(self):
raise NotImplementedError("iterate each row")
def column_iterator(self, row):
raise NotImplementedError("iterate each column at a given row")
class ISheetWriter(object):
def write_row(self, data_row):
raise NotImplementedError("How does your sheet write a row of data")
def write_array(self, table):
"""
For standalone usage, write an array
"""
for row in table:
self.write_row(row)
def close(self):
raise NotImplementedError("How would you close your file")
class NamedContent(object):
"""
Helper class for content that does not have a name
"""
def __init__(self, name, payload):
self.name = name
self.payload = payload

View File

@ -0,0 +1,15 @@
from .abstract_sheet import ISheetWriter
class IWriter(object):
def create_sheet(self, sheet_name) -> ISheetWriter:
raise NotImplementedError("Please implement a native sheet writer")
def write(self, incoming_dict):
for sheet_name in incoming_dict:
sheet_writer = self.create_sheet(sheet_name)
if sheet_writer:
sheet_writer.write_array(incoming_dict[sheet_name])
sheet_writer.close()
else:
raise Exception("Cannot create a sheet writer!")

View File

@ -4,24 +4,23 @@
factory for getting readers and writers
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from lml.loader import scan_plugins_regex
from lml.plugin import PluginManager
from lml.plugin import PluginInfoChain, PluginInfo
import pyexcel_io.utils as ioutils
import pyexcel_io.manager as manager
import pyexcel_io.exceptions as exceptions
import pyexcel_io.constants as constants
import pyexcel_io.exceptions as exceptions
from lml.loader import scan_plugins_regex
from lml.plugin import PluginInfo, PluginManager, PluginInfoChain
ERROR_MESSAGE_FORMATTER = "one of these plugins for %s data in '%s': %s"
UPGRADE_MESSAGE = "Please upgrade the plugin '%s' according to \
plugin compactibility table."
READER_PLUGIN = "pyexcel-io reader"
READER_PLUGIN_V2 = "pyexcel-io v2 reader"
WRITER_PLUGIN = "pyexcel-io writer"
WRITER_PLUGIN_V2 = "pyexcel-io v2 writer"
class IOPluginInfo(PluginInfo):
@ -33,7 +32,7 @@ class IOPluginInfo(PluginInfo):
class IOPluginInfoChain(PluginInfoChain):
"""provide custom functions to add a reader and a writer """
"""provide custom functions to add a reader and a writer"""
def add_a_reader(
self,
@ -41,7 +40,7 @@ class IOPluginInfoChain(PluginInfoChain):
file_types=None,
stream_type=None,
):
""" add pyexcle-io reader plugin info """
"""add pyexcle-io reader plugin info"""
a_plugin_info = IOPluginInfo(
READER_PLUGIN,
self._get_abs_path(relative_plugin_class_path),
@ -56,7 +55,7 @@ class IOPluginInfoChain(PluginInfoChain):
file_types=None,
stream_type=None,
):
""" add pyexcle-io writer plugin info """
"""add pyexcle-io writer plugin info"""
a_plugin_info = IOPluginInfo(
WRITER_PLUGIN,
self._get_abs_path(relative_plugin_class_path),
@ -66,6 +65,50 @@ class IOPluginInfoChain(PluginInfoChain):
return self.add_a_plugin_instance(a_plugin_info)
class IOPluginInfoChainV2(PluginInfoChain):
"""provide custom functions to add a reader and a writer"""
def add_a_reader(
self,
relative_plugin_class_path=None,
locations=(),
file_types=None,
stream_type=None,
):
"""add pyexcle-io reader plugin info"""
a_plugin_info = IOPluginInfo(
READER_PLUGIN_V2,
self._get_abs_path(relative_plugin_class_path),
file_types=[
f"{location}-{file_type}"
for file_type in file_types
for location in locations
],
stream_type=stream_type,
)
return self.add_a_plugin_instance(a_plugin_info)
def add_a_writer(
self,
relative_plugin_class_path=None,
locations=(),
file_types=(),
stream_type=None,
):
"""add pyexcle-io writer plugin info"""
a_plugin_info = IOPluginInfo(
WRITER_PLUGIN_V2,
self._get_abs_path(relative_plugin_class_path),
file_types=[
f"{location}-{file_type}"
for file_type in file_types
for location in locations
],
stream_type=stream_type,
)
return self.add_a_plugin_instance(a_plugin_info)
class IOManager(PluginManager):
"""Manage pyexcel-io plugins"""
@ -81,13 +124,16 @@ class IOManager(PluginManager):
_do_additional_registration(plugin_info)
def register_a_plugin(self, cls, plugin_info):
""" for dynamically loaded plugin """
"""for dynamically loaded plugin"""
PluginManager.register_a_plugin(self, cls, plugin_info)
_do_additional_registration(plugin_info)
def get_a_plugin(self, file_type=None, library=None, **keywords):
__file_type = file_type.lower()
plugin = self.load_me_now(__file_type, library=library)
try:
plugin = self.load_me_now(__file_type, library=library)
except Exception:
self.raise_exception(__file_type)
handler = plugin()
handler.set_type(__file_type)
return handler
@ -112,7 +158,7 @@ class IOManager(PluginManager):
)
def get_all_formats(self):
""" return all supported formats """
"""return all supported formats"""
all_formats = set(
list(self.registry.keys()) + list(self.known_plugins.keys())
)
@ -122,14 +168,88 @@ class IOManager(PluginManager):
return all_formats
class NewIOManager(IOManager):
def load_me_later(self, plugin_info):
PluginManager.load_me_later(self, plugin_info)
_do_additional_registration_for_new_plugins(plugin_info)
def register_a_plugin(self, cls, plugin_info):
"""for dynamically loaded plugin"""
PluginManager.register_a_plugin(self, cls, plugin_info)
_do_additional_registration_for_new_plugins(plugin_info)
def get_a_plugin(
self, file_type=None, location=None, library=None, **keywords
):
__file_type = file_type.lower()
plugin = self.load_me_now(f"{location}-{__file_type}", library=library)
return plugin
def raise_exception(self, file_type):
file_type = file_type.split("-")[1]
plugins = self.known_plugins.get(file_type, None)
if plugins:
message = "Please install "
if len(plugins) > 1:
message += ERROR_MESSAGE_FORMATTER % (
self.action,
file_type,
",".join(plugins),
)
else:
message += plugins[0]
raise exceptions.SupportingPluginAvailableButNotInstalled(message)
else:
raise exceptions.NoSupportingPluginFound(
"No suitable library found for %s" % file_type
)
def get_all_formats(self):
"""return all supported formats"""
all_formats = set(
[x.split("-")[1] for x in self.registry.keys()]
+ list(self.known_plugins.keys())
)
return all_formats
def _do_additional_registration(plugin_info):
for file_type in plugin_info.tags():
manager.register_stream_type(file_type, plugin_info.stream_type)
manager.register_a_file_type(file_type, plugin_info.stream_type, None)
READERS = IOManager(READER_PLUGIN, ioutils.AVAILABLE_READERS)
WRITERS = IOManager(WRITER_PLUGIN, ioutils.AVAILABLE_WRITERS)
def _do_additional_registration_for_new_plugins(plugin_info):
for file_type in plugin_info.tags():
manager.register_stream_type(
file_type.split("-")[1], plugin_info.stream_type
)
manager.register_a_file_type(
file_type.split("-")[1], plugin_info.stream_type, None
)
class AllReaders:
def get_all_formats(self):
return OLD_READERS.get_all_formats().union(
NEW_READERS.get_all_formats()
) - set([constants.DB_SQL, constants.DB_DJANGO])
class AllWriters:
def get_all_formats(self):
return OLD_WRITERS.get_all_formats().union(
NEW_WRITERS.get_all_formats()
) - set([constants.DB_SQL, constants.DB_DJANGO])
OLD_READERS = IOManager(READER_PLUGIN, ioutils.AVAILABLE_READERS)
OLD_WRITERS = IOManager(WRITER_PLUGIN, ioutils.AVAILABLE_WRITERS)
NEW_WRITERS = NewIOManager(WRITER_PLUGIN_V2, ioutils.AVAILABLE_WRITERS)
NEW_READERS = NewIOManager(READER_PLUGIN_V2, ioutils.AVAILABLE_READERS)
READERS = AllReaders()
WRITERS = AllWriters()
def load_plugins(plugin_name_patterns, path, black_list, white_list):
@ -138,5 +258,5 @@ def load_plugins(plugin_name_patterns, path, black_list, white_list):
plugin_name_patterns=plugin_name_patterns,
pyinstaller_path=path,
black_list=black_list,
white_list=white_list
white_list=white_list,
)

126
pyexcel_io/reader.py Normal file
View File

@ -0,0 +1,126 @@
from pyexcel_io.sheet import SheetReader
from pyexcel_io.plugins import NEW_READERS
from pyexcel_io._compact import OrderedDict
def clean_keywords(keywords):
sheet_keywords = {}
native_sheet_keywords = {}
args_list = [
"start_row",
"row_limit",
"start_column",
"column_limit",
"skip_column_func",
"skip_row_func",
"skip_empty_rows",
"row_renderer",
"keep_trailing_empty_cells",
]
for arg in keywords:
if arg in args_list:
sheet_keywords[arg] = keywords[arg]
else:
native_sheet_keywords[arg] = keywords[arg]
return sheet_keywords, native_sheet_keywords
class Reader(object):
def __init__(self, file_type, library=None):
self.file_type = file_type
self.library = library
self.keywords = None
# if you know which reader class to use, this attribute allows
# you to set reader class externally. Since there is no
# so call private field in Python, I am not going to create
# useless setter and getter functions like Java.
# in pyexcel, this attribute is mainly used for testing
self.reader_class = None
def open(self, file_name, **keywords):
if self.reader_class is None:
self.reader_class = NEW_READERS.get_a_plugin(
self.file_type, location="file", library=self.library
)
self.keywords, native_sheet_keywords = clean_keywords(keywords)
self.reader = self.reader_class(
file_name, self.file_type, **native_sheet_keywords
)
return self.reader
def open_content(self, file_content, **keywords):
self.keywords, native_sheet_keywords = clean_keywords(keywords)
if self.reader_class is None:
self.reader_class = NEW_READERS.get_a_plugin(
self.file_type, location="content", library=self.library
)
self.reader = self.reader_class(
file_content, self.file_type, **native_sheet_keywords
)
return self.reader
def open_stream(self, file_stream, **keywords):
self.keywords, native_sheet_keywords = clean_keywords(keywords)
if self.reader_class is None:
self.reader_class = NEW_READERS.get_a_plugin(
self.file_type, location="memory", library=self.library
)
self.reader = self.reader_class(
file_stream, self.file_type, **native_sheet_keywords
)
return self.reader
def read_sheet_by_name(self, sheet_name):
"""
read a named sheet from a excel data book
"""
sheet_names = self.reader.sheet_names()
index = sheet_names.index(sheet_name)
return self.read_sheet_by_index(index)
def read_sheet_by_index(self, sheet_index):
sheet_reader = self.reader.read_sheet(sheet_index)
sheet_names = self.reader.sheet_names()
sheet = EncapsulatedSheetReader(sheet_reader, **self.keywords)
return {sheet_names[sheet_index]: sheet.to_array()}
def read_all(self):
"""
read everything from a excel data book
"""
result = OrderedDict()
for sheet_index in range(len(self.reader)):
content_dict = self.read_sheet_by_index(sheet_index)
result.update(content_dict)
return result
def read_many(self, sheets):
"""
read everything from a excel data book
"""
result = OrderedDict()
for sheet in sheets:
if isinstance(sheet, int):
result.update(self.read_sheet_by_index(sheet))
else:
result.update(self.read_sheet_by_name(sheet))
return result
def close(self):
return self.reader.close()
def __enter__(self):
return self
def __exit__(self, a_type, value, traceback):
self.close()
class EncapsulatedSheetReader(SheetReader):
def row_iterator(self):
yield from self._native_sheet.row_iterator()
def column_iterator(self, row):
yield from self._native_sheet.column_iterator(row)

View File

@ -4,26 +4,34 @@
file readers
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io.plugins import IOPluginInfoChain
from pyexcel_io.plugins import IOPluginInfoChainV2
IOPluginInfoChain(__name__).add_a_reader(
relative_plugin_class_path="csvr.CSVBookReader",
file_types=["csv"],
IOPluginInfoChainV2(__name__).add_a_reader(
relative_plugin_class_path="csv_in_file.FileReader",
locations=["file"],
file_types=["csv", "tsv"],
stream_type="text",
).add_a_reader(
relative_plugin_class_path="tsv.TSVBookReader",
file_types=["tsv"],
relative_plugin_class_path="csv_content.ContentReader",
locations=["content"],
file_types=["csv", "tsv"],
stream_type="text",
).add_a_reader(
relative_plugin_class_path="csvz.CSVZipBookReader",
file_types=["csvz"],
relative_plugin_class_path="csv_in_memory.MemoryReader",
locations=["memory"],
file_types=["csv", "tsv"],
stream_type="text",
).add_a_reader(
relative_plugin_class_path="csvz.FileReader",
file_types=["csvz", "tsvz"],
locations=["file", "memory"],
stream_type="binary",
).add_a_reader(
relative_plugin_class_path="tsvz.TSVZipBookReader",
file_types=["tsvz"],
relative_plugin_class_path="csvz.ContentReader",
file_types=["csvz", "tsvz"],
locations=["content"],
stream_type="binary",
)

View File

@ -0,0 +1,27 @@
import mmap
from pyexcel_io.book import _convert_content_to_stream
from pyexcel_io.readers.csv_sheet import CSVMemoryMapIterator
from pyexcel_io.readers.csv_in_memory import MemoryReader
class ContentReader(MemoryReader):
def __init__(self, file_content, file_type, **keywords):
file_stream = ContentReader.convert_content_to_stream(
file_content, file_type, **keywords
)
super().__init__(file_stream, file_type, **keywords)
@staticmethod
def convert_content_to_stream(file_content, file_type, **keywords):
encoding = keywords.get("encoding", "utf-8")
if isinstance(file_content, mmap.mmap):
# load from mmap
file_stream = CSVMemoryMapIterator(file_content, encoding)
else:
if isinstance(file_content, bytes):
file_content = file_content.decode(encoding)
file_stream = _convert_content_to_stream(file_content, file_type)
return file_stream

View File

@ -0,0 +1,64 @@
import os
import re
import glob
from pyexcel_io import constants
from pyexcel_io.sheet import NamedContent
from pyexcel_io.plugin_api import IReader
from pyexcel_io.readers.csv_sheet import CSVFileReader
DEFAULT_NEWLINE = "\r\n"
class FileReader(IReader):
def __init__(self, file_name, file_type, **keywords):
"""Load content from a file
:params str filename: an accessible file path
:returns: a book
"""
self.handles = []
self.keywords = keywords
if file_type == constants.FILE_FORMAT_TSV:
self.keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
self.__line_terminator = keywords.get(
constants.KEYWORD_LINE_TERMINATOR, DEFAULT_NEWLINE
)
names = os.path.splitext(file_name)
filepattern = "%s%s*%s*%s" % (
names[0],
constants.DEFAULT_MULTI_CSV_SEPARATOR,
constants.DEFAULT_MULTI_CSV_SEPARATOR,
names[1],
)
filelist = glob.glob(filepattern)
if len(filelist) == 0:
file_parts = os.path.split(file_name)
self.content_array = [NamedContent(file_parts[-1], file_name)]
else:
matcher = "%s%s(.*)%s(.*)%s" % (
names[0],
constants.DEFAULT_MULTI_CSV_SEPARATOR,
constants.DEFAULT_MULTI_CSV_SEPARATOR,
names[1],
)
tmp_file_list = []
for filen in filelist:
result = re.match(matcher, filen)
tmp_file_list.append((result.group(1), result.group(2), filen))
ret = []
for lsheetname, index, filen in sorted(
tmp_file_list, key=lambda row: row[1]
):
ret.append(NamedContent(lsheetname, filen))
self.content_array = ret
def read_sheet(self, index):
reader = CSVFileReader(self.content_array[index], **self.keywords)
self.handles.append(reader)
return reader
def close(self):
for reader in self.handles:
reader.close()
self.handles = []

View File

@ -0,0 +1,62 @@
import re
import pyexcel_io._compact as compact
from pyexcel_io import constants
from pyexcel_io.sheet import NamedContent
from pyexcel_io.plugin_api import IReader
from pyexcel_io.readers.csv_sheet import CSVinMemoryReader
DEFAULT_SHEET_SEPARATOR_FORMATTER = f"---{constants.DEFAULT_NAME}---%s"
class MemoryReader(IReader):
def __init__(
self, file_stream, file_type, multiple_sheets=False, **keywords
):
"""Load content from memory
:params stream file_content: the actual file content in memory
:returns: a book
"""
self.handles = []
self.keywords = keywords
if file_type == constants.FILE_FORMAT_TSV:
self.keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
self.file_type = file_type
self.__load_from_memory_flag = True
self.__line_terminator = keywords.get(
constants.KEYWORD_LINE_TERMINATOR, constants.DEFAULT_CSV_NEWLINE
)
separator = DEFAULT_SHEET_SEPARATOR_FORMATTER % self.__line_terminator
if multiple_sheets:
# will be slow for large files
file_stream.seek(0)
content = file_stream.read()
sheets = content.split(separator)
named_contents = []
for sheet in sheets:
if sheet == "": # skip empty named sheet
continue
lines = sheet.split(self.__line_terminator)
result = re.match(constants.SEPARATOR_MATCHER, lines[0])
new_content = "\n".join(lines[1:])
new_sheet = NamedContent(
result.group(1), compact.StringIO(new_content)
)
named_contents.append(new_sheet)
self.content_array = named_contents
else:
if hasattr(file_stream, "seek"):
file_stream.seek(0)
self.content_array = [NamedContent(self.file_type, file_stream)]
def read_sheet(self, index):
reader = CSVinMemoryReader(self.content_array[index], **self.keywords)
self.handles.append(reader)
return reader
def close(self):
for reader in self.handles:
reader.close()

View File

@ -0,0 +1,192 @@
"""
pyexcel_io.readers.csv_sheet
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
csv file reader
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import csv
import pyexcel_io.service as service
import pyexcel_io._compact as compact
import pyexcel_io.constants as constants
from pyexcel_io.plugin_api import ISheet
DEFAULT_SEPARATOR = "__"
DEFAULT_SHEET_SEPARATOR_FORMATTER = "---%s---" % constants.DEFAULT_NAME + "%s"
SEPARATOR_MATCHER = "---%s:(.*)---" % constants.DEFAULT_NAME
DEFAULT_CSV_STREAM_FILE_FORMATTER = (
"---%s:" % constants.DEFAULT_NAME + "%s---%s"
)
DEFAULT_NEWLINE = "\r\n"
BOM_LITTLE_ENDIAN = b"\xff\xfe"
BOM_BIG_ENDIAN = b"\xfe\ff"
LITTLE_ENDIAN = 0
BIG_ENDIAN = 1
class CSVMemoryMapIterator(object):
"""
Wrapper class for mmap object
mmap object does not handle encoding at all. This class
provide the necessary transcoding for utf-8, utf-16 and utf-32
"""
def __init__(self, mmap_obj, encoding):
self.__mmap_obj = mmap_obj
self.__encoding = encoding
self.__count = 0
self.__endian = LITTLE_ENDIAN
if encoding == "utf-8":
# ..\r\x00\n
# \x00\x..
self.__zeros_left_in_2_row = 0
elif encoding == "utf-16":
# ..\r\x00\n
# \x00\x..
self.__zeros_left_in_2_row = 1
elif encoding == "utf-32":
# \r\x00\x00\x00\n
# \x00\x00\x00\x..
self.__zeros_left_in_2_row = 3
elif encoding in ["utf-32-be", "utf-16-be"]:
self.__zeros_left_in_2_row = 0
self.__endian = BIG_ENDIAN
elif encoding == "utf-32-le":
self.__zeros_left_in_2_row = 3
self.__endian = LITTLE_ENDIAN
elif encoding == "utf-16-le":
self.__zeros_left_in_2_row = 1
self.__endian = LITTLE_ENDIAN
else:
raise Exception("Encoding %s is not supported" % encoding)
def __iter__(self):
return self
def __next__(self):
line = self.__mmap_obj.readline()
if self.__count == 0:
utf_16_32 = (
self.__encoding == "utf-16" or self.__encoding == "utf-32"
)
if utf_16_32:
bom_header = line[:2]
if bom_header == BOM_BIG_ENDIAN:
self.__endian = BIG_ENDIAN
elif self.__endian == LITTLE_ENDIAN:
line = line[self.__zeros_left_in_2_row :] # noqa: E203
if self.__endian == LITTLE_ENDIAN:
line = line.rstrip()
line = line.decode(self.__encoding)
self.__count += 1
if line == "":
raise StopIteration
return line
def close(self):
pass
class CSVSheetReader(ISheet):
"""generic csv file reader"""
def __init__(
self,
sheet,
encoding="utf-8",
auto_detect_float=True,
ignore_infinity=True,
auto_detect_int=True,
auto_detect_datetime=True,
pep_0515_off=True,
ignore_nan_text=False,
default_float_nan=None,
**keywords
):
self._native_sheet = sheet
self._encoding = encoding
self.__auto_detect_int = auto_detect_int
self.__auto_detect_float = auto_detect_float
self.__ignore_infinity = ignore_infinity
self.__auto_detect_datetime = auto_detect_datetime
self.__file_handle = None
self.__pep_0515_off = pep_0515_off
self.__ignore_nan_text = ignore_nan_text
self.__default_float_nan = default_float_nan
self._keywords = keywords
def get_file_handle(self):
"""return me unicde reader for csv"""
raise NotImplementedError("Please implement get_file_handle()")
def row_iterator(self):
self.__file_handle = self.get_file_handle()
return csv.reader(self.__file_handle, **self._keywords)
def column_iterator(self, row):
for element in row:
if element is not None and element != "":
element = self.__convert_cell(element)
yield element
def __convert_cell(self, csv_cell_text):
ret = None
if self.__auto_detect_int:
ret = service.detect_int_value(csv_cell_text, self.__pep_0515_off)
if ret is None and self.__auto_detect_float:
ret = service.detect_float_value(
csv_cell_text,
self.__pep_0515_off,
ignore_nan_text=self.__ignore_nan_text,
default_float_nan=self.__default_float_nan,
)
shall_we_ignore_the_conversion = (
ret in [float("inf"), float("-inf")]
) and self.__ignore_infinity
if shall_we_ignore_the_conversion:
ret = None
if ret is None and self.__auto_detect_datetime:
ret = service.detect_date_value(csv_cell_text)
if ret is None:
ret = csv_cell_text
return ret
def close(self):
if self.__file_handle:
self.__file_handle.close()
# else: means the generator has been run
# yes, no run, no file open.
class CSVFileReader(CSVSheetReader):
"""read csv from phyical file"""
def get_file_handle(self):
unicode_reader = open(
self._native_sheet.payload, "r", encoding=self._encoding
)
return unicode_reader
class CSVinMemoryReader(CSVSheetReader):
"""read csv file from memory"""
def get_file_handle(self):
if isinstance(self._native_sheet.payload, compact.BytesIO):
# please note that
# if the end developer feed us bytesio in python3
# we will do the conversion to StriongIO but that
# comes at a cost.
content = self._native_sheet.payload.read()
unicode_reader = compact.StringIO(content.decode(self._encoding))
else:
unicode_reader = self._native_sheet.payload
return unicode_reader

View File

@ -1,369 +0,0 @@
"""
pyexcel_io.readers.csvr
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
csv file reader
:copyright: (c) 2014-2017 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import re
import os
import csv
import glob
import codecs
from pyexcel_io.book import BookReader
from pyexcel_io.sheet import SheetReader, NamedContent
import pyexcel_io._compact as compact
import pyexcel_io.constants as constants
import pyexcel_io.service as service
DEFAULT_SEPARATOR = "__"
DEFAULT_SHEET_SEPARATOR_FORMATTER = "---%s---" % constants.DEFAULT_NAME + "%s"
SEPARATOR_MATCHER = "---%s:(.*)---" % constants.DEFAULT_NAME
DEFAULT_CSV_STREAM_FILE_FORMATTER = (
"---%s:" % constants.DEFAULT_NAME + "%s---%s"
)
DEFAULT_NEWLINE = "\r\n"
BOM_LITTLE_ENDIAN = b"\xff\xfe"
BOM_BIG_ENDIAN = b"\xfe\ff"
LITTLE_ENDIAN = 0
BIG_ENDIAN = 1
class CSVMemoryMapIterator(compact.Iterator):
"""
Wrapper class for mmap object
mmap object does not handle encoding at all. This class
provide the necessary transcoding for utf-8, utf-16 and utf-32
"""
def __init__(self, mmap_obj, encoding):
self.__mmap_obj = mmap_obj
self.__encoding = encoding
self.__count = 0
self.__endian = LITTLE_ENDIAN
if encoding == "utf-8":
# ..\r\x00\n
# \x00\x..
self.__zeros_left_in_2_row = 0
elif encoding == "utf-16":
# ..\r\x00\n
# \x00\x..
self.__zeros_left_in_2_row = 1
elif encoding == "utf-32":
# \r\x00\x00\x00\n
# \x00\x00\x00\x..
self.__zeros_left_in_2_row = 3
elif encoding == "utf-32-be" or encoding == "utf-16-be":
self.__zeros_left_in_2_row = 0
self.__endian = BIG_ENDIAN
elif encoding == "utf-32-le":
self.__zeros_left_in_2_row = 3
self.__endian = LITTLE_ENDIAN
elif encoding == "utf-16-le":
self.__zeros_left_in_2_row = 1
self.__endian = LITTLE_ENDIAN
else:
raise Exception("Encoding %s is not supported" % encoding)
def __iter__(self):
return self
def __next__(self):
line = self.__mmap_obj.readline()
if self.__count == 0:
utf_16_32 = (
self.__encoding == "utf-16" or self.__encoding == "utf-32"
)
if utf_16_32:
bom_header = line[:2]
if bom_header == BOM_BIG_ENDIAN:
self.__endian = BIG_ENDIAN
elif self.__endian == LITTLE_ENDIAN:
line = line[self.__zeros_left_in_2_row :] # noqa: E203
if self.__endian == LITTLE_ENDIAN:
line = line.rstrip()
line = line.decode(self.__encoding)
self.__count += 1
if line == "":
raise StopIteration
if compact.PY2:
# python 2 requires utf-8 encoded string for reading
line = line.encode("utf-8")
return line
class UTF8Recorder(compact.Iterator):
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8.
"""
def __init__(self, file_handle, encoding):
self.__file_handle = file_handle
self.reader = codecs.getreader(encoding)(file_handle)
def close(self):
self.__file_handle.close()
def __iter__(self):
return self
def __next__(self):
# python 2 requires utf-8 encoded string for reading
line = next(self.reader).encode("utf-8")
return line
class CSVSheetReader(SheetReader):
""" generic csv file reader"""
def __init__(
self,
sheet,
encoding="utf-8",
auto_detect_float=True,
ignore_infinity=True,
auto_detect_int=True,
auto_detect_datetime=True,
pep_0515_off=True,
ignore_nan_text=False,
default_float_nan=None,
**keywords
):
SheetReader.__init__(self, sheet, **keywords)
self._encoding = encoding
self.__auto_detect_int = auto_detect_int
self.__auto_detect_float = auto_detect_float
self.__ignore_infinity = ignore_infinity
self.__auto_detect_datetime = auto_detect_datetime
self.__file_handle = None
self.__pep_0515_off = pep_0515_off
self.__ignore_nan_text = ignore_nan_text
self.__default_float_nan = default_float_nan
def get_file_handle(self):
""" return me unicde reader for csv """
raise NotImplementedError("Please implement get_file_handle()")
def row_iterator(self):
self.__file_handle = self.get_file_handle()
return csv.reader(self.__file_handle, **self._keywords)
def column_iterator(self, row):
for element in row:
if compact.PY2:
element = element.decode("utf-8")
if element is not None and element != "":
element = self.__convert_cell(element)
yield element
def __convert_cell(self, csv_cell_text):
ret = None
if self.__auto_detect_int:
ret = service.detect_int_value(csv_cell_text, self.__pep_0515_off)
if ret is None and self.__auto_detect_float:
ret = service.detect_float_value(
csv_cell_text,
self.__pep_0515_off,
ignore_nan_text=self.__ignore_nan_text,
default_float_nan=self.__default_float_nan,
)
shall_we_ignore_the_conversion = (
ret in [float("inf"), float("-inf")]
) and self.__ignore_infinity
if shall_we_ignore_the_conversion:
ret = None
if ret is None and self.__auto_detect_datetime:
ret = service.detect_date_value(csv_cell_text)
if ret is None:
ret = csv_cell_text
return ret
def close(self):
if self.__file_handle:
self.__file_handle.close()
# else: means the generator has been run
# yes, no run, no file open.
class CSVFileReader(CSVSheetReader):
""" read csv from phyical file """
def get_file_handle(self):
unicode_reader = None
if compact.PY2:
file_handle = open(self._native_sheet.payload, "rb")
unicode_reader = UTF8Recorder(file_handle, self._encoding)
else:
unicode_reader = open(
self._native_sheet.payload, "r", encoding=self._encoding
)
return unicode_reader
class CSVinMemoryReader(CSVSheetReader):
""" read csv file from memory """
def get_file_handle(self):
unicode_reader = None
if compact.PY2:
if hasattr(self._native_sheet.payload, "read"):
unicode_reader = UTF8Recorder(
self._native_sheet.payload, self._encoding
)
else:
unicode_reader = self._native_sheet.payload
else:
if isinstance(self._native_sheet.payload, compact.BytesIO):
# please note that
# if the end developer feed us bytesio in python3
# we will do the conversion to StriongIO but that
# comes at a cost.
content = self._native_sheet.payload.read()
unicode_reader = compact.StringIO(
content.decode(self._encoding)
)
else:
unicode_reader = self._native_sheet.payload
return unicode_reader
class CSVBookReader(BookReader):
""" read csv file """
def __init__(self):
BookReader.__init__(self)
self._file_type = constants.FILE_FORMAT_CSV
self._file_content = None
self.__load_from_memory_flag = False
self.__line_terminator = constants.DEFAULT_CSV_NEWLINE
self.__sheet_name = None
self.__sheet_index = None
self.__multiple_sheets = False
self.__readers = []
def open(self, file_name, **keywords):
BookReader.open(self, file_name, **keywords)
self._native_book = self._load_from_file()
def open_stream(self, file_stream, multiple_sheets=False, **keywords):
BookReader.open_stream(self, file_stream, **keywords)
self.__multiple_sheets = multiple_sheets
self._native_book = self._load_from_stream()
def open_content(self, file_content, **keywords):
try:
import mmap
encoding = keywords.get("encoding", "utf-8")
if isinstance(file_content, mmap.mmap):
# load from mmap
self.__multiple_sheets = keywords.get("multiple_sheets", False)
self._file_stream = CSVMemoryMapIterator(
file_content, encoding
)
self._keywords = keywords
self._native_book = self._load_from_stream()
else:
if compact.PY3_ABOVE:
if isinstance(file_content, bytes):
file_content = file_content.decode(encoding)
# else python 2.7 does not care about bytes nor str
BookReader.open_content(self, file_content, **keywords)
except ImportError:
# python 2.6 or Google app engine
BookReader.open_content(self, file_content, **keywords)
def read_sheet(self, native_sheet):
if self.__load_from_memory_flag:
reader = CSVinMemoryReader(native_sheet, **self._keywords)
else:
reader = CSVFileReader(native_sheet, **self._keywords)
self.__readers.append(reader)
return reader.to_array()
def close(self):
for reader in self.__readers:
reader.close()
def _load_from_stream(self):
"""Load content from memory
:params stream file_content: the actual file content in memory
:returns: a book
"""
self.__load_from_memory_flag = True
self.__line_terminator = self._keywords.get(
constants.KEYWORD_LINE_TERMINATOR, self.__line_terminator
)
separator = DEFAULT_SHEET_SEPARATOR_FORMATTER % self.__line_terminator
if self.__multiple_sheets:
# will be slow for large files
self._file_stream.seek(0)
content = self._file_stream.read()
sheets = content.split(separator)
named_contents = []
for sheet in sheets:
if sheet == "": # skip empty named sheet
continue
lines = sheet.split(self.__line_terminator)
result = re.match(constants.SEPARATOR_MATCHER, lines[0])
new_content = "\n".join(lines[1:])
new_sheet = NamedContent(
result.group(1), compact.StringIO(new_content)
)
named_contents.append(new_sheet)
return named_contents
else:
if hasattr(self._file_stream, "seek"):
self._file_stream.seek(0)
return [NamedContent(self._file_type, self._file_stream)]
def _load_from_file(self):
"""Load content from a file
:params str filename: an accessible file path
:returns: a book
"""
self.__line_terminator = self._keywords.get(
constants.KEYWORD_LINE_TERMINATOR, self.__line_terminator
)
names = os.path.splitext(self._file_name)
filepattern = "%s%s*%s*%s" % (
names[0],
constants.DEFAULT_MULTI_CSV_SEPARATOR,
constants.DEFAULT_MULTI_CSV_SEPARATOR,
names[1],
)
filelist = glob.glob(filepattern)
if len(filelist) == 0:
file_parts = os.path.split(self._file_name)
return [NamedContent(file_parts[-1], self._file_name)]
else:
matcher = "%s%s(.*)%s(.*)%s" % (
names[0],
constants.DEFAULT_MULTI_CSV_SEPARATOR,
constants.DEFAULT_MULTI_CSV_SEPARATOR,
names[1],
)
tmp_file_list = []
for filen in filelist:
result = re.match(matcher, filen)
tmp_file_list.append((result.group(1), result.group(2), filen))
ret = []
for lsheetname, index, filen in sorted(
tmp_file_list, key=lambda row: row[1]
):
ret.append(NamedContent(lsheetname, filen))
return ret

View File

@ -4,69 +4,56 @@
The lower level csvz file format handler.
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import zipfile
from io import BytesIO
from pyexcel_io._compact import StringIO, PY2
from pyexcel_io.book import BookReader
from pyexcel_io.constants import FILE_FORMAT_CSVZ
from .csvr import CSVinMemoryReader, NamedContent
import chardet
from pyexcel_io import constants
from pyexcel_io.sheet import NamedContent
from pyexcel_io._compact import StringIO
from pyexcel_io.readers.csv_sheet import CSVinMemoryReader
from pyexcel_io.plugin_api.abstract_reader import IReader
class CSVZipBookReader(BookReader):
"""csvz reader
Read zipped csv file that was zipped up by pyexcel-io. It support
single csv file and multiple csv files.
"""
def __init__(self):
BookReader.__init__(self)
self._file_type = FILE_FORMAT_CSVZ
self.zipfile = None
def open(self, file_name, **keywords):
BookReader.open(self, file_name, **keywords)
self._native_book = self._load_from_file_alike_object(self._file_name)
def open_stream(self, file_stream, **keywords):
BookReader.open_stream(self, file_stream, **keywords)
self._native_book = self._load_from_file_alike_object(
self._file_stream
)
def read_sheet(self, native_sheet):
content = self.zipfile.read(native_sheet.payload)
if PY2:
sheet = StringIO(content)
else:
sheet = StringIO(content.decode("utf-8"))
reader = CSVinMemoryReader(
NamedContent(native_sheet.name, sheet), **self._keywords
)
return reader.to_array()
def close(self):
if self.zipfile:
self.zipfile.close()
def _load_from_file_alike_object(self, file_alike_object):
class FileReader(IReader):
def __init__(self, file_alike_object, file_type, **keywords):
self.content_array = []
try:
self.zipfile = zipfile.ZipFile(file_alike_object, "r")
sheets = [
NamedContent(_get_sheet_name(name), name)
for name in self.zipfile.namelist()
]
return sheets
self.content_array = sheets
self.keywords = keywords
if file_type == constants.FILE_FORMAT_TSVZ:
self.keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
except zipfile.BadZipfile:
print("StringIO instance was passed by any chance?")
raise
def close(self):
if self.zipfile:
self.zipfile.close()
def read_sheet(self, index):
name = self.content_array[index].name
content = self.zipfile.read(self.content_array[index].payload)
encoding_guess = chardet.detect(content)
sheet = StringIO(content.decode(encoding_guess["encoding"]))
return CSVinMemoryReader(NamedContent(name, sheet), **self.keywords)
class ContentReader(FileReader):
def __init__(self, file_content, file_type, **keywords):
io = BytesIO(file_content)
super().__init__(io, file_type, **keywords)
def _get_sheet_name(filename):
len_of_a_dot = 1

View File

@ -1,27 +0,0 @@
"""
pyexcel_io.readers.tsv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The lower level tsv file format handler.
:copyright: (c) 2014-2017 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import pyexcel_io.constants as constants
from .csvr import CSVBookReader
class TSVBookReader(CSVBookReader):
""" Read tab separated values """
def __init__(self):
CSVBookReader.__init__(self)
self._file_type = constants.FILE_FORMAT_TSV
def open(self, file_name, **keywords):
keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
CSVBookReader.open(self, file_name, **keywords)
def open_stream(self, file_content, **keywords):
keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
CSVBookReader.open_stream(self, file_content, **keywords)

View File

@ -1,31 +0,0 @@
"""
pyexcel_io.fileformat.tsvz
~~~~~~~~~~~~~~~~~~~~~~~~~~
The lower level tsvz file format handler.
:copyright: (c) 2014-2017 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io.constants import FILE_FORMAT_TSVZ, KEYWORD_TSV_DIALECT
from .csvz import CSVZipBookReader
class TSVZipBookReader(CSVZipBookReader):
""" read zipped tab separated value file
it supports single tsv file and mulitple tsv files
"""
def __init__(self):
CSVZipBookReader.__init__(self)
self._file_type = FILE_FORMAT_TSVZ
def open(self, file_name, **keywords):
keywords["dialect"] = KEYWORD_TSV_DIALECT
CSVZipBookReader.open(self, file_name, **keywords)
def open_stream(self, file_content, **keywords):
keywords["dialect"] = KEYWORD_TSV_DIALECT
CSVZipBookReader.open_stream(self, file_content, **keywords)

View File

@ -4,16 +4,14 @@
provide service code to downstream projects
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import re
import math
import datetime
from pyexcel_io._compact import PY2
from pyexcel_io import constants
from pyexcel_io import exceptions
from pyexcel_io import constants, exceptions
def has_no_digits_in_float(value):
@ -175,13 +173,10 @@ ODS_WRITE_FORMAT_COVERSION = {
datetime.date: "date",
datetime.time: "time",
datetime.timedelta: "timedelta",
datetime.datetime: "datetime",
bool: "boolean",
}
if PY2:
ODS_WRITE_FORMAT_COVERSION[unicode] = "string" # noqa: F821
ODS_WRITE_FORMAT_COVERSION[long] = "throw_exception" # noqa: F821
VALUE_CONVERTERS = {
"float": float_value,
@ -234,7 +229,7 @@ ODS_VALUE_CONVERTERS = {
"boolean": ods_bool_value,
"timedelta": ods_timedelta_value,
"float": ods_float_value,
"throw_exception": throw_exception
"long": ods_float_value,
}

View File

@ -4,22 +4,13 @@
The io interface to file extensions
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io._compact import irange
from pyexcel_io.utils import _index_filter
import pyexcel_io.constants as constants
class NamedContent(object):
"""
Helper class for content that does not have a name
"""
def __init__(self, name, payload):
self.name = name
self.payload = payload
from pyexcel_io.utils import _index_filter
from pyexcel_io._compact import irange
from pyexcel_io.plugin_api import NamedContent # noqa: F401
class SheetReader(object):
@ -38,11 +29,12 @@ class SheetReader(object):
skip_column_func=None,
skip_empty_rows=False,
row_renderer=None,
**keywords
keep_trailing_empty_cells=False,
**deprecated_use_of_keywords_here
):
self._native_sheet = sheet
self._keywords = {}
self._keywords.update(keywords)
self._keywords.update(deprecated_use_of_keywords_here)
self._start_row = start_row
self._row_limit = row_limit
self._start_column = start_column
@ -51,6 +43,7 @@ class SheetReader(object):
self._skip_column = _index_filter
self._skip_empty_rows = skip_empty_rows
self._row_renderer = row_renderer
self.keep_trailing_empty_cells = keep_trailing_empty_cells
if skip_row_func:
self._skip_row = skip_row_func
@ -58,8 +51,7 @@ class SheetReader(object):
self._skip_column = skip_column_func
def to_array(self):
"""2 dimentional representation of the content
"""
"""2 dimentional representation of the content"""
for row_index, row in enumerate(self.row_iterator()):
row_position = self._skip_row(
row_index, self._start_row, self._row_limit
@ -85,10 +77,13 @@ class SheetReader(object):
elif column_position == constants.STOP_ITERATION:
break
tmp_row.append(cell_value)
if cell_value is not None and cell_value != "":
return_row += tmp_row
tmp_row = []
if self.keep_trailing_empty_cells:
return_row.append(cell_value)
else:
tmp_row.append(cell_value)
if cell_value is not None and cell_value != "":
return_row += tmp_row
tmp_row = []
if self._skip_empty_rows and len(return_row) < 1:
# we by-pass next yeild here
# because it is an empty row

View File

@ -4,36 +4,45 @@
utility functions
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import pyexcel_io.constants as constants
XLS_PLUGIN = "pyexcel-xls"
XLSX_PLUGIN = "pyexcel-xlsx"
ODS_PLUGIN = "pyexcel-ods"
ODS3_PLUGIN = "pyexcel-ods3"
ODSR_PLUGIN = "pyexcel-odsr"
ODSW_PLUGIN = "pyexcel-odsw"
XLSXR_PLUGIN = "pyexcel-xlsxr"
XLSXW_PLUGIN = "pyexcel-xlsxw"
XLSBR_PLUGIN = "pyexcel-xlsbr"
HTMLR_PLUGIN = "pyexcel-htmlr"
PDFR_PLUGIN = "pyexcel-pdfr"
IO_ITSELF = "pyexcel-io"
AVAILABLE_NEW_READERS = {}
AVAILABLE_READERS = {
constants.FILE_FORMAT_CSV: [IO_ITSELF],
constants.FILE_FORMAT_XLS: [XLS_PLUGIN],
constants.FILE_FORMAT_XLSX: [XLS_PLUGIN, XLSX_PLUGIN],
constants.FILE_FORMAT_XLSM: [XLS_PLUGIN, XLSX_PLUGIN],
constants.FILE_FORMAT_ODS: [ODS_PLUGIN, ODS3_PLUGIN],
constants.FILE_FORMAT_CSV: [IO_ITSELF],
constants.FILE_FORMAT_ODS: [ODS_PLUGIN, ODS3_PLUGIN, ODSR_PLUGIN],
constants.FILE_FORMAT_TSV: [IO_ITSELF],
constants.FILE_FORMAT_CSVZ: [IO_ITSELF],
constants.FILE_FORMAT_TSVZ: [IO_ITSELF],
constants.FILE_FORMAT_XLSB: [XLSBR_PLUGIN],
constants.FILE_FORMAT_HTML: [HTMLR_PLUGIN],
constants.FILE_FORMAT_PDF: [PDFR_PLUGIN],
}
AVAILABLE_WRITERS = {
constants.FILE_FORMAT_XLS: [XLS_PLUGIN],
constants.FILE_FORMAT_XLSX: [XLSX_PLUGIN, XLSXW_PLUGIN],
constants.FILE_FORMAT_XLSM: [XLSX_PLUGIN],
constants.FILE_FORMAT_ODS: [ODS_PLUGIN, ODS3_PLUGIN],
constants.FILE_FORMAT_ODS: [ODS_PLUGIN, ODS3_PLUGIN, ODSW_PLUGIN],
constants.FILE_FORMAT_CSV: [IO_ITSELF],
constants.FILE_FORMAT_TSV: [IO_ITSELF],
constants.FILE_FORMAT_CSVZ: [IO_ITSELF],
@ -60,10 +69,10 @@ def is_empty_array(array):
def swap_empty_string_for_none(array):
""" replace empty string fields with None """
"""replace empty string fields with None"""
def swap(value):
""" change empty string to None """
"""change empty string to None"""
if value == "":
return None

51
pyexcel_io/writer.py Normal file
View File

@ -0,0 +1,51 @@
from pyexcel_io.plugins import NEW_WRITERS
class Writer(object):
def __init__(self, file_type, library=None):
self.file_type = file_type
self.library = library
self.keyboards = None
# if you know which reader class to use, this attribute allows
# you to set reader class externally. Since there is no
# so call private field in Python, I am not going to create
# useless setter and getter functions like Java.
# in pyexcel, this attribute is mainly used for testing
self.writer_class = None
def open(self, file_name, **keywords):
if self.writer_class is None:
self.writer_class = NEW_WRITERS.get_a_plugin(
self.file_type, library=self.library, location="file"
)
self.writer = self.writer_class(file_name, self.file_type, **keywords)
def open_content(self, file_stream, **keywords):
if self.writer_class is None:
self.writer_class = NEW_WRITERS.get_a_plugin(
self.file_type, library=self.library, location="content"
)
self.writer = self.writer_class(
file_stream, self.file_type, **keywords
)
def open_stream(self, file_stream, **keywords):
if self.writer_class is None:
self.writer_class = NEW_WRITERS.get_a_plugin(
self.file_type, library=self.library, location="memory"
)
self.writer = self.writer_class(
file_stream, self.file_type, **keywords
)
def write(self, incoming_dict):
self.writer.write(incoming_dict)
def close(self):
self.writer.close()
def __enter__(self):
return self
def __exit__(self, a_type, value, traceback):
self.close()

View File

@ -4,26 +4,24 @@
file writers
:copyright: (c) 2014-2017 by Onni Software Ltd.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io.plugins import IOPluginInfoChain
from pyexcel_io.plugins import IOPluginInfoChainV2
IOPluginInfoChain(__name__).add_a_writer(
relative_plugin_class_path="csvw.CSVBookWriter",
file_types=["csv"],
IOPluginInfoChainV2(__name__).add_a_writer(
relative_plugin_class_path="csv_in_file.CsvFileWriter",
locations=["file", "content"],
file_types=["csv", "tsv"],
stream_type="text",
).add_a_writer(
relative_plugin_class_path="tsv.TSVBookWriter",
file_types=["tsv"],
relative_plugin_class_path="csv_in_memory.CsvMemoryWriter",
locations=["memory"],
file_types=["csv", "tsv"],
stream_type="text",
).add_a_writer(
relative_plugin_class_path="csvz.CSVZipBookWriter",
file_types=["csvz"],
stream_type="binary",
).add_a_writer(
relative_plugin_class_path="tsvz.TSVZipBookWriter",
file_types=["tsvz"],
relative_plugin_class_path="csvz_writer.CsvZipWriter",
locations=["memory", "file", "content"],
file_types=["csvz", "tsvz"],
stream_type="binary",
)

View File

@ -0,0 +1,29 @@
from pyexcel_io import constants
from pyexcel_io.plugin_api import IWriter
from pyexcel_io.writers.csv_sheet import CSVFileWriter
class CsvFileWriter(IWriter):
def __init__(self, file_alike_object, file_type, **keywords):
self._file_alike_object = file_alike_object
self._keywords = keywords
if file_type == constants.FILE_FORMAT_TSV:
self._keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
self.__index = 0
self.handlers = []
def create_sheet(self, name):
writer = CSVFileWriter(
self._file_alike_object,
name,
sheet_index=self.__index,
**self._keywords
)
self.__index = self.__index + 1
self.handlers.append(writer)
return writer
def close(self):
for writer in self.handlers:
writer.close()
self.handlers = []

View File

@ -0,0 +1,26 @@
from pyexcel_io import constants
from pyexcel_io.plugin_api import IWriter
from pyexcel_io.writers.csv_sheet import CSVMemoryWriter
class CsvMemoryWriter(IWriter):
def __init__(self, file_alike_object, file_type, **keywords):
self._file_alike_object = file_alike_object
self._keywords = keywords
if file_type == constants.FILE_FORMAT_TSV:
self._keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
self.__index = 0
def create_sheet(self, name):
writer_class = CSVMemoryWriter
writer = writer_class(
self._file_alike_object,
name,
sheet_index=self.__index,
**self._keywords
)
self.__index = self.__index + 1
return writer
def close(self):
pass

View File

@ -0,0 +1,98 @@
"""
pyexcel_io.writers.csv_sheet
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The lower level csv file format writer
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import csv
import pyexcel_io.constants as constants
from pyexcel_io.plugin_api import ISheetWriter
class CSVFileWriter(ISheetWriter):
"""
csv file writer
"""
def __init__(
self,
filename,
name,
encoding="utf-8",
single_sheet_in_book=False,
sheet_index=None,
**keywords
):
self._encoding = encoding
self._sheet_name = name
if self._sheet_name is None or single_sheet_in_book:
self._sheet_name = constants.DEFAULT_SHEET_NAME
self._single_sheet_in_book = single_sheet_in_book
self.__line_terminator = constants.DEFAULT_CSV_NEWLINE
self._keywords = keywords
if constants.KEYWORD_LINE_TERMINATOR in keywords:
self.__line_terminator = keywords.get(
constants.KEYWORD_LINE_TERMINATOR
)
self._sheet_index = sheet_index
self.file_handle = None
self._native_book = filename
self.writer = self.get_writer()
def get_writer(self):
if self._sheet_name != constants.DEFAULT_SHEET_NAME:
names = self._native_book.split(".")
file_name = "%s%s%s%s%s.%s" % (
names[0],
constants.DEFAULT_MULTI_CSV_SEPARATOR,
self._sheet_name, # sheet name
constants.DEFAULT_MULTI_CSV_SEPARATOR,
self._sheet_index, # sheet index
names[1],
)
else:
file_name = self._native_book
self.file_handle = open(
file_name, "w", newline="", encoding=self._encoding
)
return csv.writer(self.file_handle, **self._keywords)
def write_row(self, array):
"""
write a row into the file
"""
self.writer.writerow(array)
def close(self):
self.file_handle.close()
class CSVMemoryWriter(CSVFileWriter):
"""Write csv to a memory stream"""
def get_writer(self):
self.file_handle = self._native_book
writer = csv.writer(self.file_handle, **self._keywords)
if not self._single_sheet_in_book:
writer.writerow(
[
constants.DEFAULT_CSV_STREAM_FILE_FORMATTER
% (self._sheet_name, "")
]
)
return writer
def close(self):
if self._single_sheet_in_book:
# on purpose, the this is not done
# because the io stream can be used later
pass
else:
self.writer.writerow([constants.SEPARATOR_FORMATTER % ""])

View File

@ -1,192 +0,0 @@
"""
pyexcel_io.writers.csvw
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The lower level csv file format writer
:copyright: (c) 2014-2017 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import csv
import codecs
from pyexcel_io.book import BookWriter
from pyexcel_io.sheet import SheetWriter
import pyexcel_io._compact as compact
import pyexcel_io.constants as constants
class UnicodeWriter(object):
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, file_handle, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = compact.StringIO()
self.writer = csv.writer(self.queue, **kwds)
self.stream = file_handle
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
""" write row into the csv file """
self.writer.writerow(
[compact.text_type(s).encode("utf-8") for s in row]
)
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
""" write multiple rows into csv file """
for row in rows:
self.writerow(row)
class CSVSheetWriter(SheetWriter):
"""
csv file writer
"""
def __init__(
self,
filename,
name,
encoding="utf-8",
single_sheet_in_book=False,
sheet_index=None,
**keywords
):
self._encoding = encoding
self._sheet_name = name
self._single_sheet_in_book = single_sheet_in_book
self.__line_terminator = constants.DEFAULT_CSV_NEWLINE
if constants.KEYWORD_LINE_TERMINATOR in keywords:
self.__line_terminator = keywords.get(
constants.KEYWORD_LINE_TERMINATOR
)
if single_sheet_in_book:
self._sheet_name = None
self._sheet_index = sheet_index
self.writer = None
self.file_handle = None
SheetWriter.__init__(
self, filename, self._sheet_name, self._sheet_name, **keywords
)
def write_row(self, array):
"""
write a row into the file
"""
self.writer.writerow(array)
class CSVFileWriter(CSVSheetWriter):
""" Write csv to a physical file """
def close(self):
self.file_handle.close()
def set_sheet_name(self, name):
if name != constants.DEFAULT_SHEET_NAME:
names = self._native_book.split(".")
file_name = "%s%s%s%s%s.%s" % (
names[0],
constants.DEFAULT_MULTI_CSV_SEPARATOR,
name, # sheet name
constants.DEFAULT_MULTI_CSV_SEPARATOR,
self._sheet_index, # sheet index
names[1],
)
else:
file_name = self._native_book
if compact.PY2:
self.file_handle = open(file_name, "wb")
self.writer = UnicodeWriter(
self.file_handle, encoding=self._encoding, **self._keywords
)
else:
self.file_handle = open(
file_name, "w", newline="", encoding=self._encoding
)
self.writer = csv.writer(self.file_handle, **self._keywords)
class CSVMemoryWriter(CSVSheetWriter):
""" Write csv to a memory stream """
def __init__(
self,
filename,
name,
encoding="utf-8",
single_sheet_in_book=False,
sheet_index=None,
**keywords
):
CSVSheetWriter.__init__(
self,
filename,
name,
encoding=encoding,
single_sheet_in_book=single_sheet_in_book,
sheet_index=sheet_index,
**keywords
)
def set_sheet_name(self, name):
if compact.PY2:
self.file_handle = self._native_book
self.writer = UnicodeWriter(
self.file_handle, encoding=self._encoding, **self._keywords
)
else:
self.file_handle = self._native_book
self.writer = csv.writer(self.file_handle, **self._keywords)
if not self._single_sheet_in_book:
self.writer.writerow(
[
constants.DEFAULT_CSV_STREAM_FILE_FORMATTER
% (self._sheet_name, "")
]
)
def close(self):
if self._single_sheet_in_book:
# on purpose, the this is not done
# because the io stream can be used later
pass
else:
self.writer.writerow([constants.SEPARATOR_FORMATTER % ""])
class CSVBookWriter(BookWriter):
""" write csv with unicode support """
def __init__(self):
BookWriter.__init__(self)
self._file_type = constants.FILE_FORMAT_CSV
self.__index = 0
def create_sheet(self, name):
writer_class = None
if compact.is_string(type(self._file_alike_object)):
writer_class = CSVFileWriter
else:
writer_class = CSVMemoryWriter
writer = writer_class(
self._file_alike_object,
name,
sheet_index=self.__index,
**self._keywords
)
self.__index = self.__index + 1
return writer

View File

@ -1,73 +0,0 @@
"""
pyexcel_io.fileformat.csvz
~~~~~~~~~~~~~~~~~~~~~~~~~~~
The lower level csvz file format handler.
:copyright: (c) 2014-2017 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import zipfile
from pyexcel_io._compact import StringIO, PY2
from pyexcel_io.book import BookWriter
from pyexcel_io.constants import DEFAULT_SHEET_NAME, FILE_FORMAT_CSVZ
from .csvw import CSVSheetWriter, UnicodeWriter
class CSVZipSheetWriter(CSVSheetWriter):
""" handle the zipfile interface """
def __init__(self, zipfile, sheetname, file_extension, **keywords):
self.file_extension = file_extension
keywords["single_sheet_in_book"] = False
CSVSheetWriter.__init__(self, zipfile, sheetname, **keywords)
def set_sheet_name(self, name):
self.content = StringIO()
if PY2:
self.writer = UnicodeWriter(
self.content, encoding=self._encoding, **self._keywords
)
else:
import csv
self.writer = csv.writer(self.content, **self._keywords)
def close(self):
file_name = "%s.%s" % (self._native_sheet, self.file_extension)
self.content.seek(0)
self._native_book.writestr(file_name, self.content.read())
self.content.close()
class CSVZipBookWriter(BookWriter):
"""
csvz writer
It is better to store csv files as a csvz as it saves your disk space.
Pyexcel-io had the facility to unzip it for you or you could use
any other unzip software.
"""
def __init__(self):
BookWriter.__init__(self)
self._file_type = FILE_FORMAT_CSVZ
self.zipfile = None
def open(self, file_name, **keywords):
BookWriter.open(self, file_name, **keywords)
self.zipfile = zipfile.ZipFile(file_name, "w", zipfile.ZIP_DEFLATED)
def create_sheet(self, name):
given_name = name
if given_name is None:
given_name = DEFAULT_SHEET_NAME
writer = CSVZipSheetWriter(
self.zipfile, given_name, self._file_type[:3], **self._keywords
)
return writer
def close(self):
self.zipfile.close()

View File

@ -0,0 +1,32 @@
"""
pyexcel_io.fileformat.csvz_sheet
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The lower level csvz file format handler.
:copyright: (c) 2014-2022 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import csv
from pyexcel_io._compact import StringIO
from pyexcel_io.writers.csv_sheet import CSVFileWriter
class CSVZipSheetWriter(CSVFileWriter):
"""handle the zipfile interface"""
def __init__(self, zipfile, sheetname, file_extension, **keywords):
self.file_extension = file_extension
keywords["single_sheet_in_book"] = False
self.content = StringIO()
super().__init__(zipfile, sheetname, **keywords)
def get_writer(self):
return csv.writer(self.content, **self._keywords)
def close(self):
file_name = "%s.%s" % (self._sheet_name, self.file_extension)
self.content.seek(0)
self._native_book.writestr(file_name, self.content.read())
self.content.close()

View File

@ -0,0 +1,35 @@
import zipfile
from pyexcel_io import constants
from pyexcel_io.plugin_api import IWriter
from pyexcel_io.writers.csvz_sheet import CSVZipSheetWriter
class CsvZipWriter(IWriter):
"""
csvz writer
It is better to store csv files as a csvz as it saves your disk space.
Pyexcel-io had the facility to unzip it for you or you could use
any other unzip software.
"""
def __init__(self, file_name, file_type, **keywords):
self._file_type = file_type
self.zipfile = zipfile.ZipFile(file_name, "w", zipfile.ZIP_DEFLATED)
self._keywords = keywords
if file_type == constants.FILE_FORMAT_TSVZ:
self._keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
def create_sheet(self, name):
given_name = name
if given_name is None:
given_name = constants.DEFAULT_SHEET_NAME
writer = CSVZipSheetWriter(
self.zipfile, given_name, self._file_type[:3], **self._keywords
)
return writer
def close(self):
if self.zipfile:
self.zipfile.close()

View File

@ -1,23 +0,0 @@
"""
pyexcel_io.fileformat.tsv
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The lower level tsv file format handler.
:copyright: (c) 2014-2017 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
import pyexcel_io.constants as constants
from .csvw import CSVBookWriter
class TSVBookWriter(CSVBookWriter):
""" write tsv """
def __init__(self):
CSVBookWriter.__init__(self)
self._file_type = constants.FILE_FORMAT_TSV
def open(self, file_name, **keywords):
keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
CSVBookWriter.open(self, file_name, **keywords)

View File

@ -1,27 +0,0 @@
"""
pyexcel_io.fileformat.tsvz
~~~~~~~~~~~~~~~~~~~~~~~~~~
The lower level tsvz file format handler.
:copyright: (c) 2014-2017 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io.constants import FILE_FORMAT_TSVZ, KEYWORD_TSV_DIALECT
from .csvz import CSVZipBookWriter
class TSVZipBookWriter(CSVZipBookWriter):
""" write zipped tsv file
It is similiar to CSVZipBookWriter, but support tab separated values
"""
def __init__(self):
CSVZipBookWriter.__init__(self)
self._file_type = FILE_FORMAT_TSVZ
def open(self, file_name, **keywords):
keywords["dialect"] = KEYWORD_TSV_DIALECT
CSVZipBookWriter.open(self, file_name, **keywords)

View File

@ -1,2 +1 @@
ordereddict;python_version<"2.7"
lml>=0.0.4

View File

@ -1,5 +1 @@
.
https://github.com/chfw/lml/archive/master.zip
https://github.com/pyexcel/pyexcel/archive/master.zip
https://github.com/pyexcel/pyexcel-xls/archive/master.zip

129
setup.py
View File

@ -1,6 +1,9 @@
#!/usr/bin/env python3
# Template by pypi-mobans
"""
Template by pypi-mobans
"""
import os
import sys
import codecs
@ -22,84 +25,86 @@ PY33 = sys.version_info < (3, 4)
try:
lc = locale.getlocale()
pf = platform.system()
if pf != 'Windows' and lc == (None, None):
locale.setlocale(locale.LC_ALL, 'C.UTF-8')
if pf != "Windows" and lc == (None, None):
locale.setlocale(locale.LC_ALL, "C.UTF-8")
except (ValueError, UnicodeError, locale.Error):
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
NAME = 'pyexcel-io'
AUTHOR = 'C.W.'
VERSION = '0.5.13'
EMAIL = 'wangc_2011@hotmail.com'
LICENSE = 'New BSD'
NAME = "pyexcel-io"
AUTHOR = "C.W."
VERSION = "0.6.6"
EMAIL = "info@pyexcel.org"
LICENSE = "New BSD"
DESCRIPTION = (
'A python library to read and write structured data in csv, zipped csv' +
'format and to/from databases'
"A python library to read and write structured data in csv, zipped csv" +
"format and to/from databases"
)
URL = 'https://github.com/pyexcel/pyexcel-io'
DOWNLOAD_URL = '%s/archive/0.5.13.tar.gz' % URL
FILES = ['README.rst', 'CHANGELOG.rst']
URL = "https://github.com/pyexcel/pyexcel-io"
DOWNLOAD_URL = "%s/archive/0.6.6.tar.gz" % URL
FILES = ["README.rst", "CHANGELOG.rst"]
KEYWORDS = [
'python',
'API',
'tsv',
'tsvz',
'csv',
'csvz',
'django',
'sqlalchemy',
"python",
"API",
"tsv",
"tsvz",
"csv",
"csvz",
"django",
"sqlalchemy",
]
CLASSIFIERS = [
'Topic :: Software Development :: Libraries',
'Programming Language :: Python',
'Intended Audience :: Developers',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
"Topic :: Software Development :: Libraries",
"Programming Language :: Python",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
'License :: OSI Approved :: BSD License',
'Programming Language :: Python :: Implementation :: PyPy'
]
PYTHON_REQUIRES = ">=3.6"
INSTALL_REQUIRES = [
'lml>=0.0.4',
"lml>=0.0.4",
]
SETUP_COMMANDS = {}
if PY26:
INSTALL_REQUIRES.append('ordereddict')
PACKAGES = find_packages(exclude=['ez_setup', 'examples', 'tests'])
PACKAGES = find_packages(exclude=["ez_setup", "examples", "tests", "tests.*"])
EXTRAS_REQUIRE = {
'xls': ['pyexcel-xls>=0.5.0'],
'xlsx': ['pyexcel-xlsx>=0.5.0'],
'ods': ['pyexcel-ods3>=0.5.0'],
"xls": ['pyexcel-xls>=0.6.0'],
"xlsx": ['pyexcel-xlsx>=0.6.0'],
"ods": ['pyexcel-ods3>=0.6.0'],
}
# You do not need to read beyond this line
PUBLISH_COMMAND = '{0} setup.py sdist bdist_wheel upload -r pypi'.format(
sys.executable)
GS_COMMAND = ('gs pyexcel-io v0.5.13 ' +
"Find 0.5.13 in changelog for more details")
NO_GS_MESSAGE = ('Automatic github release is disabled. ' +
'Please install gease to enable it.')
PUBLISH_COMMAND = "{0} setup.py sdist bdist_wheel upload -r pypi".format(sys.executable)
HERE = os.path.abspath(os.path.dirname(__file__))
GS_COMMAND = ("gease pyexcel-io v0.6.6 " +
"Find 0.6.6 in changelog for more details")
NO_GS_MESSAGE = ("Automatic github release is disabled. " +
"Please install gease to enable it.")
UPLOAD_FAILED_MSG = (
'Upload failed. please run "%s" yourself.' % PUBLISH_COMMAND)
HERE = os.path.abspath(os.path.dirname(__file__))
class PublishCommand(Command):
"""Support setup.py upload."""
description = 'Build and publish the package on github and pypi'
description = "Build and publish the package on github and pypi"
user_options = []
@staticmethod
def status(s):
"""Prints things in bold."""
print('\033[1m{0}\033[0m'.format(s))
print("\033[1m{0}\033[0m".format(s))
def initialize_options(self):
pass
@ -109,14 +114,14 @@ class PublishCommand(Command):
def run(self):
try:
self.status('Removing previous builds...')
rmtree(os.path.join(HERE, 'dist'))
rmtree(os.path.join(HERE, 'build'))
rmtree(os.path.join(HERE, 'pyexcel_io.egg-info'))
self.status("Removing previous builds...")
rmtree(os.path.join(HERE, "dist"))
rmtree(os.path.join(HERE, "build"))
rmtree(os.path.join(HERE, "pyexcel_io.egg-info"))
except OSError:
pass
self.status('Building Source and Wheel (universal) distribution...')
self.status("Building Source and Wheel (universal) distribution...")
run_status = True
if has_gease():
run_status = os.system(GS_COMMAND) == 0
@ -124,16 +129,15 @@ class PublishCommand(Command):
self.status(NO_GS_MESSAGE)
if run_status:
if os.system(PUBLISH_COMMAND) != 0:
self.status(UPLOAD_FAILED_MSG % PUBLISH_COMMAND)
self.status(UPLOAD_FAILED_MSG)
sys.exit()
SETUP_COMMANDS.update({
'publish': PublishCommand
"publish": PublishCommand
})
def has_gease():
"""
test if github release command is installed
@ -159,7 +163,7 @@ def read_files(*files):
def read(afile):
"""Read a file into setup"""
the_relative_file = os.path.join(HERE, afile)
with codecs.open(the_relative_file, 'r', 'utf-8') as opened_file:
with codecs.open(the_relative_file, "r", "utf-8") as opened_file:
content = filter_out_test_code(opened_file)
content = "".join(list(content))
return content
@ -168,11 +172,11 @@ def read(afile):
def filter_out_test_code(file_handle):
found_test_code = False
for line in file_handle.readlines():
if line.startswith('.. testcode:'):
if line.startswith(".. testcode:"):
found_test_code = True
continue
if found_test_code is True:
if line.startswith(' '):
if line.startswith(" "):
continue
else:
empty_line = line.strip()
@ -182,14 +186,14 @@ def filter_out_test_code(file_handle):
found_test_code = False
yield line
else:
for keyword in ['|version|', '|today|']:
for keyword in ["|version|", "|today|"]:
if keyword in line:
break
else:
yield line
if __name__ == '__main__':
if __name__ == "__main__":
setup(
test_suite="tests",
name=NAME,
@ -202,8 +206,9 @@ if __name__ == '__main__':
long_description=read_files(*FILES),
license=LICENSE,
keywords=KEYWORDS,
python_requires=PYTHON_REQUIRES,
extras_require=EXTRAS_REQUIRE,
tests_require=['nose'],
tests_require=["nose"],
install_requires=INSTALL_REQUIRES,
packages=PACKAGES,
include_package_data=True,

View File

@ -1,2 +1,2 @@
pip freeze
nosetests --with-coverage --cover-package pyexcel_io --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io && flake8 . --exclude=.moban.d,docs --builtins=unicode,xrange,long
nosetests --with-coverage --cover-package pyexcel_io --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io

View File

@ -1,2 +1,3 @@
#/bin/bash
pip freeze
nosetests --with-coverage --cover-package pyexcel_io --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io && flake8 . --exclude=.moban.d,docs --builtins=unicode,xrange,long
nosetests --with-coverage --cover-package pyexcel_io --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io

1
tests/__init__.py Normal file
View File

@ -0,0 +1 @@
# needed for Python 2.7, python setup.py test to discover tests directory

View File

@ -3,6 +3,13 @@ mock;python_version<"3"
codecov
coverage
flake8
black
isort
collective.checkdocs
pygments
moban
moban_jinja2_github
pyexcel
pyexcel-xls==0.5.9
SQLAlchemy
pyexcel>=0.2.0
pyexcel-xls>=0.1.0
pyexcel-xlsxw

View File

@ -1,6 +1,7 @@
from pyexcel_io.sheet import SheetReader, SheetWriter, NamedContent
from pyexcel_io.book import BookWriter
from pyexcel_io.sheet import SheetReader, SheetWriter, NamedContent
from pyexcel_io.utils import is_empty_array
from nose.tools import raises

View File

@ -1,5 +1,12 @@
from pyexcel_io.book import (
BookReader,
BookWriter,
RWInterface,
_convert_content_to_stream,
)
from pyexcel_io._compact import BytesIO, StringIO
from nose.tools import raises
from pyexcel_io.book import RWInterface, BookReader, BookWriter
@raises(NotImplementedError)
@ -30,3 +37,15 @@ def test_book_reader_open_stream():
def test_book_writer():
writer = BookWriter()
writer.open_stream("a string")
def test_convert_to_bytes_stream():
file_content = b"test"
stream = _convert_content_to_stream(file_content, "csv")
assert isinstance(stream, StringIO)
def test_convert_to_string_stream():
file_content = "test"
stream = _convert_content_to_stream(file_content, "csvz")
assert isinstance(stream, BytesIO)

View File

@ -1,19 +1,20 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
from unittest import TestCase
from textwrap import dedent
from nose.tools import raises, eq_
from unittest import TestCase
import pyexcel_io.manager as manager
from pyexcel_io import get_data
from pyexcel_io.sheet import NamedContent
from pyexcel_io.readers.csvr import (
CSVSheetReader,
from pyexcel_io.reader import EncapsulatedSheetReader
from pyexcel_io._compact import BytesIO, StringIO
from pyexcel_io.readers.csv_sheet import (
CSVFileReader,
CSVSheetReader,
CSVinMemoryReader,
)
from pyexcel_io.writers.csvw import CSVFileWriter, CSVMemoryWriter
from pyexcel_io._compact import BytesIO, PY2, StringIO
from pyexcel_io.writers.csv_sheet import CSVFileWriter, CSVMemoryWriter
from nose.tools import eq_, raises
class TestReaders(TestCase):
@ -32,7 +33,9 @@ class TestReaders(TestCase):
sheet.get_file_handle()
def test_sheet_file_reader(self):
r = CSVFileReader(NamedContent(self.file_type, self.test_file))
r = EncapsulatedSheetReader(
CSVFileReader(NamedContent(self.file_type, self.test_file))
)
result = list(r.to_array())
self.assertEqual(result, self.expected_data)
@ -41,7 +44,10 @@ class TestReaders(TestCase):
with open(self.test_file, "r") as f:
io.write(f.read())
io.seek(0)
r = CSVinMemoryReader(NamedContent(self.file_type, io))
r = EncapsulatedSheetReader(
CSVinMemoryReader(NamedContent(self.file_type, io))
)
result = list(r.to_array())
self.assertEqual(result, self.expected_data)
@ -108,20 +114,37 @@ class TestNonUniformCSV(TestCase):
f.write(",".join(row) + "\n")
def test_sheet_file_reader(self):
r = CSVFileReader(NamedContent(self.file_type, self.test_file))
r = EncapsulatedSheetReader(
CSVFileReader(NamedContent(self.file_type, self.test_file))
)
result = list(r.to_array())
self.assertEqual(result, [[1], [4, 5, 6], ["", 7]])
def test_sheet_file_reader_with_trailing_empty_cells(self):
r = EncapsulatedSheetReader(
CSVFileReader(NamedContent(self.file_type, self.test_file)),
keep_trailing_empty_cells=True,
)
result = list(r.to_array())
self.assertEqual(result, [[1], [4, 5, 6, "", ""], ["", 7]])
def test_get_data_with_trailing_empty_cells(self):
result = get_data(self.test_file, keep_trailing_empty_cells=True)
self.assertEqual(
result[self.test_file], [[1], [4, 5, 6, "", ""], ["", 7]]
)
def tearDown(self):
os.unlink(self.test_file)
def test_utf16_decoding():
test_file = os.path.join("tests", "fixtures", "csv-encoding-utf16.csv")
reader = CSVFileReader(NamedContent("csv", test_file), encoding="utf-16")
reader = EncapsulatedSheetReader(
CSVFileReader(NamedContent("csv", test_file), encoding="utf-16")
)
content = list(reader.to_array())
if PY2:
content[0] = [s.encode("utf-8") for s in content[0]]
expected = [["Äkkilähdöt", "Matkakirjoituksia", "Matkatoimistot"]]
eq_(content, expected)
@ -136,8 +159,6 @@ def test_utf16_encoding():
writer.close()
with open(test_file, "rb") as f:
actual = f.read().decode("utf-16")
if PY2:
actual = actual.encode("utf-8")
eq_(actual, "Äkkilähdöt,Matkakirjoituksia,Matkatoimistot\n")
os.unlink(test_file)
@ -145,12 +166,11 @@ def test_utf16_encoding():
def test_utf16_memory_decoding():
test_content = u"Äkkilähdöt,Matkakirjoituksia,Matkatoimistot"
test_content = BytesIO(test_content.encode("utf-16"))
reader = CSVinMemoryReader(
NamedContent("csv", test_content), encoding="utf-16"
reader = EncapsulatedSheetReader(
CSVinMemoryReader(NamedContent("csv", test_content), encoding="utf-16")
)
content = list(reader.to_array())
if PY2:
content[0] = [s.encode("utf-8") for s in content[0]]
expected = [["Äkkilähdöt", "Matkakirjoituksia", "Matkatoimistot"]]
eq_(content, expected)
@ -167,6 +187,4 @@ def test_utf16_memory_encoding():
)
writer.write_array(content)
actual = io.getvalue()
if PY2:
actual = actual.decode("utf-16")
eq_(actual, u"Äkkilähdöt,Matkakirjoituksia,Matkatoimistot\n")

View File

@ -1,21 +1,23 @@
from nose.tools import raises, eq_
from pyexcel_io import save_data
from pyexcel_io.reader import EncapsulatedSheetReader
from pyexcel_io._compact import OrderedDict
from pyexcel_io.constants import DB_DJANGO
from pyexcel_io.database.common import (
DjangoModelImporter,
DjangoModelImportAdapter,
DjangoModelExporter,
DjangoModelImporter,
DjangoModelExportAdapter,
)
from pyexcel_io.database.importers.django import (
DjangoModelWriter,
DjangoBookWriter,
DjangoModelImportAdapter,
)
from pyexcel_io.database.exporters.django import (
DjangoModelReader,
DjangoBookReader,
DjangoModelReader,
)
from pyexcel_io.database.importers.django import (
DjangoBookWriter,
DjangoModelWriter,
)
from nose.tools import eq_, raises
class Package:
@ -156,10 +158,13 @@ class TestSheet:
writer = DjangoModelWriter(None, adapter)
writer.write_array(self.data[1:])
writer.close()
assert model.objects.objs == [
{"Y": 2, "X": 2, "Z": 3},
{"Y": 5, "X": 5, "Z": 6},
]
eq_(
model.objects.objs,
[
{"Y": 2, "X": 2, "Z": 3},
{"Y": 5, "X": 5, "Z": 6},
],
)
def test_sheet_save_to_django_model_skip_me(self):
model = FakeDjangoModel()
@ -176,7 +181,7 @@ class TestSheet:
writer = DjangoModelWriter(None, adapter)
writer.write_array(self.data[1:])
writer.close()
assert model.objects.objs == [{"Y": 2, "X": 1, "Z": 3}]
eq_(model.objects.objs, [{"Y": 2, "X": 1, "Z": 3}])
def test_load_sheet_from_django_model(self):
model = FakeDjangoModel()
@ -209,7 +214,9 @@ class TestSheet:
return [str(element) for element in row]
# the key point of this test case
reader = DjangoModelReader(model, row_renderer=row_renderer)
reader = EncapsulatedSheetReader(
DjangoModelReader(model), row_renderer=row_renderer
)
data = reader.to_array()
expected = [["X", "Y", "Z"], ["1", "2", "3"], ["4", "5", "6"]]
eq_(list(data), expected)
@ -237,6 +244,18 @@ class TestSheet:
writer.close()
eq_(model.objects.objs, self.result)
def test_jumping_columns(self):
data2 = [["D", "A", "B", "C"], [1, 1, 2, 3], [10, 4, 5, 6]]
mapdict = {"C": "Z", "A": "X", "B": "Y"}
model = FakeDjangoModel()
adapter = DjangoModelImportAdapter(model)
adapter.column_names = data2[0]
adapter.column_name_mapping_dict = mapdict
writer = DjangoModelWriter(None, adapter)
writer.write_array(data2[1:])
writer.close()
eq_(model.objects.objs, self.result)
def test_empty_model(self):
model = FakeDjangoModel()
reader = DjangoModelReader(model)
@ -279,8 +298,7 @@ class TestMultipleModels:
adapter1.get_name(): self.content["Sheet1"][1:],
adapter2.get_name(): self.content["Sheet2"][1:],
}
writer = DjangoBookWriter()
writer.open_content(importer, batch_size=sample_size)
writer = DjangoBookWriter(importer, "django", batch_size=sample_size)
writer.write(to_store)
writer.close()
assert model1.objects.objs == self.result1
@ -302,8 +320,9 @@ class TestMultipleModels:
adapter1.get_name(): self.content["Sheet1"][1:],
adapter2.get_name(): self.content["Sheet2"][1:],
}
writer = DjangoBookWriter()
writer.open_content(importer, batch_size=sample_size, bulk_save=False)
writer = DjangoBookWriter(
importer, "django", batch_size=sample_size, bulk_save=False
)
writer.write(to_store)
writer.close()
assert model1.objects.objs == []
@ -334,12 +353,11 @@ class TestMultipleModels:
adapter2 = DjangoModelExportAdapter(model2)
exporter.append(adapter1)
exporter.append(adapter2)
reader = DjangoBookReader()
reader.open_content(exporter)
data = reader.read_all()
for key in data.keys():
data[key] = list(data[key])
assert data == self.content
reader = DjangoBookReader(exporter, "django")
result = read_all(reader)
for key in result:
result[key] = list(result[key])
eq_(result, self.content)
@raises(Exception)
def test_special_case_where_only_one_model_used(self):
@ -353,28 +371,6 @@ class TestMultipleModels:
"Sheet2": self.content["Sheet2"][1:],
}
save_data(importer, to_store, file_type=DB_DJANGO)
assert model1.objects.objs == self.result1
model1._meta.model_name = "Sheet1"
model1._meta.update(["X", "Y", "Z"])
exporter = DjangoModelExporter()
adapter = DjangoModelExportAdapter(model1)
exporter.append(adapter)
reader = DjangoBookReader()
reader.open_content(exporter)
data = reader.read_all()
assert list(data["Sheet1"]) == self.content["Sheet1"]
@raises(TypeError)
def test_not_implemented_method():
reader = DjangoBookReader()
reader.open("afile")
@raises(TypeError)
def test_not_implemented_method_2():
reader = DjangoBookReader()
reader.open_stream("afile")
class TestFilter:
@ -393,25 +389,33 @@ class TestFilter:
self.model._meta.update(["X", "Y", "Z"])
def test_load_sheet_from_django_model_with_filter(self):
reader = DjangoModelReader(self.model, start_row=0, row_limit=2)
reader = EncapsulatedSheetReader(
DjangoModelReader(self.model), start_row=0, row_limit=2
)
data = reader.to_array()
expected = [["X", "Y", "Z"], [1, 2, 3]]
eq_(list(data), expected)
def test_load_sheet_from_django_model_with_filter_1(self):
reader = DjangoModelReader(self.model, start_row=1, row_limit=3)
reader = EncapsulatedSheetReader(
DjangoModelReader(self.model), start_row=1, row_limit=3
)
data = reader.to_array()
expected = [[1, 2, 3], [4, 5, 6]]
eq_(list(data), expected)
def test_load_sheet_from_django_model_with_filter_2(self):
reader = DjangoModelReader(self.model, start_column=1)
reader = EncapsulatedSheetReader(
DjangoModelReader(self.model), start_column=1
)
data = reader.to_array()
expected = [["Y", "Z"], [2, 3], [5, 6]]
eq_(list(data), expected)
def test_load_sheet_from_django_model_with_filter_3(self):
reader = DjangoModelReader(self.model, start_column=1, column_limit=1)
reader = EncapsulatedSheetReader(
DjangoModelReader(self.model), start_column=1, column_limit=1
)
data = reader.to_array()
expected = [["Y"], [2], [5]]
eq_(list(data), expected)
@ -422,3 +426,10 @@ def test_django_model_import_adapter():
adapter.column_names = ["a"]
adapter.row_initializer = "abc"
eq_(adapter.row_initializer, "abc")
def read_all(reader):
result = OrderedDict()
for index, sheet in enumerate(reader.content_array):
result.update({sheet.name: reader.read_sheet(index).to_array()})
return result

View File

@ -1,9 +1,10 @@
import os
import pyexcel_io.constants as constants
from pyexcel_io import get_data, save_data
from pyexcel_io.utils import _index_filter
from nose.tools import eq_
import pyexcel_io.constants as constants
def test_index_filter():

View File

@ -1,18 +1,20 @@
import os
import sys
import types
from zipfile import BadZipfile
from unittest import TestCase
import pyexcel_io.manager as manager
import pyexcel_io.exceptions as exceptions
from pyexcel_io._compact import StringIO, BytesIO, is_string
from pyexcel_io._compact import OrderedDict
from pyexcel_io import save_data, get_data, iget_data
from pyexcel_io import get_data, iget_data, save_data
from pyexcel_io.io import load_data, get_writer
from nose.tools import raises, eq_
from zipfile import BadZipfile
from pyexcel_io._compact import BytesIO, StringIO, OrderedDict, is_string
from nose.tools import eq_, raises
PY2 = sys.version_info[0] == 2
@raises(IOError)
def test_directory_name_as_file():
get_data("/")
def test_force_file_type():
@ -24,6 +26,20 @@ def test_force_file_type():
eq_(expected, data[test_file])
def test_force_file_type_on_write():
test_file = "force_file_type_on_write.txt"
save_data(test_file, {"sheet 1": [[1, 2]]}, force_file_type="csv")
data = get_data(test_file, force_file_type="csv")
expected = [[1, 2]]
eq_(expected, data[test_file])
os.unlink(test_file)
@raises(IOError)
def test_invalid_file():
load_data("/something/does/not/exist")
@raises(IOError)
def test_no_valid_parameters():
load_data()
@ -49,14 +65,14 @@ def test_wrong_parameter_to_get_writer():
get_writer(1)
@raises(Exception)
def test_wrong_parameter_to_get_writer2():
get_writer(1, file_type="csv")
# @raises(Exception)
# def test_wrong_parameter_to_get_writer2():
# get_writer(1, file_type="csv")
def test_load_ods_data():
msg = "Please install one of these plugins for read data in 'ods': "
msg += "pyexcel-ods,pyexcel-ods3"
msg += "pyexcel-ods,pyexcel-ods3,pyexcel-odsr"
try:
get_data("test.ods")
except exceptions.SupportingPluginAvailableButNotInstalled as e:
@ -66,7 +82,7 @@ def test_load_ods_data():
def test_load_ods_data_from_memory():
io = BytesIO()
msg = "Please install one of these plugins for read data in 'ods': "
msg += "pyexcel-ods,pyexcel-ods3"
msg += "pyexcel-ods,pyexcel-ods3,pyexcel-odsr"
try:
get_data(io, file_type="ods")
except exceptions.SupportingPluginAvailableButNotInstalled as e:
@ -84,7 +100,7 @@ def test_write_xlsx_data_to_memory():
eq_(str(e), msg)
@raises(exceptions.NoSupportingPluginFound)
@raises(IOError)
def test_load_unknown_data():
get_data("test.unknown")
@ -97,11 +113,8 @@ def test_load_unknown_data_from_memory():
@raises(BadZipfile)
def test_load_csvz_data_from_memory():
if not PY2:
io = StringIO()
get_data(io, file_type="csvz")
else:
raise BadZipfile("pass it")
io = StringIO()
get_data(io, file_type="csvz")
@raises(IOError)
@ -109,19 +122,11 @@ def test_write_xlsx_data():
get_data("test.xlsx")
@raises(exceptions.NoSupportingPluginFound)
def test_write_unknown_data():
get_data("test.unknown")
@raises(Exception)
def test_writer_csvz_data_from_memory():
if not PY2:
io = StringIO()
writer = get_writer(io, file_type="csvz")
writer.write({"adb": [[2, 3]]})
else:
raise Exception("pass it")
io = StringIO()
writer = get_writer(io, file_type="csvz")
writer.write({"adb": [[2, 3]]})
@raises(exceptions.NoSupportingPluginFound)
@ -182,6 +187,7 @@ def test_file_handle_as_input():
with open(test_file, "r") as f:
data = get_data(f, "csv")
eq_(data["csv"], [[1, 2, 3]])
os.unlink("file_handle.csv")
def test_file_type_case_insensitivity():
@ -192,6 +198,7 @@ def test_file_type_case_insensitivity():
with open(test_file, "r") as f:
data = get_data(f, "csv")
eq_(data["csv"], [[1, 2, 3]])
os.unlink("file_handle.CSv")
def test_file_handle_as_output():
@ -202,6 +209,7 @@ def test_file_handle_as_output():
with open(test_file, "r") as f:
content = f.read()
eq_(content, "1,2,3\n")
os.unlink("file_handle.csv")
def test_binary_file_content():
@ -247,10 +255,7 @@ def test_conversion_from_bytes_to_text():
def test_is_string():
if PY2:
assert is_string(type(u"a")) is True
else:
assert is_string(type("a")) is True
assert is_string(type("a")) is True
def test_generator_is_obtained():

Some files were not shown because too many files have changed in this diff Show More