diff --git a/.gitignore b/.gitignore index 89f9f44..e8b12f9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,546 @@ -*.pyc -*~ +# moban hashes +.moban.hashes + +# Extra rules from https://github.com/github/gitignore/ +# Python rules +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ .coverage -pyexcel*-info -build -dist -tmp.db -.idea/* +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# VirtualEnv rules +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +.Python +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +.venv +pip-selfcheck.json + +# Linux rules +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +# Windows rules +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# macOS rules +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# Emacs rules +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +# network security +/network-security.data + + +# Vim rules +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +*~ +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +# JetBrains rules +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +# SublimeText rules +# Cache files for Sublime Text +*.tmlanguage.cache +*.tmPreferences.cache +*.stTheme.cache + +# Workspace files are user-specific +*.sublime-workspace + +# Project files should be checked into the repository, unless a significant +# proportion of contributors will probably not be using Sublime Text +# *.sublime-project + +# SFTP configuration file +sftp-config.json +sftp-config-alt*.json + +# Package control specific files +Package Control.last-run +Package Control.ca-list +Package Control.ca-bundle +Package Control.system-ca-bundle +Package Control.cache/ +Package Control.ca-certs/ +Package Control.merged-ca-bundle +Package Control.user-ca-bundle +oscrypto-ca-bundle.crt +bh_unicode_properties.cache + +# Sublime-github package stores a github token in this file +# https://packagecontrol.io/packages/sublime-github +GitHub.sublime-settings + +# KDevelop4 rules +*.kdev4 +.kdev4/ + +# Kate rules +# Swap Files # +.*.kate-swp +.swp.* + +# TextMate rules +*.tmproj +*.tmproject +tmtags + +# VisualStudioCode rules +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Xcode rules +# Xcode +# +# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore + +## User settings +xcuserdata/ + +## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9) +*.xcscmblueprint +*.xccheckout + +## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4) +build/ +DerivedData/ +*.moved-aside +*.pbxuser +!default.pbxuser +*.mode1v3 +!default.mode1v3 +*.mode2v3 +!default.mode2v3 +*.perspectivev3 +!default.perspectivev3 + +## Gcc Patch +/*.gcno + +# Eclipse rules +.metadata +bin/ +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.settings/ +.loadpath +.recommenders + +# External tool builders +.externalToolBuilders/ + +# Locally stored "Eclipse launch configurations" +*.launch + +# PyDev specific (Python IDE for Eclipse) +*.pydevproject + +# CDT-specific (C/C++ Development Tooling) +.cproject + +# CDT- autotools +.autotools + +# Java annotation processor (APT) +.factorypath + +# PDT-specific (PHP Development Tools) +.buildpath + +# sbteclipse plugin +.target + +# Tern plugin +.tern-project + +# TeXlipse plugin +.texlipse + +# STS (Spring Tool Suite) +.springBeans + +# Code Recommenders +.recommenders/ + +# Annotation Processing +.apt_generated/ +.apt_generated_test/ + +# Scala IDE specific (Scala & Java development for Eclipse) +.cache-main +.scala_dependencies +.worksheet + +# Uncomment this line if you wish to ignore the project description file. +# Typically, this file would be tracked if it contains build/dependency configurations: +#.project + +# TortoiseGit rules +# Project-level settings +/.tgitconfig + +# Tags rules +# Ignore tags created by etags, ctags, gtags (GNU global) and cscope +TAGS +.TAGS +!TAGS/ +tags +.tags +!tags/ +gtags.files +GTAGS +GRTAGS +GPATH +GSYMS +cscope.files +cscope.out +cscope.in.out +cscope.po.out + + +# remove moban hash dictionary +.moban.hashes diff --git a/.moban.yml b/.moban.yml index e6307f0..98dbc6f 100644 --- a/.moban.yml +++ b/.moban.yml @@ -7,3 +7,4 @@ targets: - .travis.yml: custom_travis.yml.jj2 - README.rst: io_readme.rst.jj2 - "docs/source/index.rst": "docs/source/index.rst" + - .gitignore: gitignore.jj2 diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst index 8f171f7..6d0fa16 100644 --- a/CONTRIBUTORS.rst +++ b/CONTRIBUTORS.rst @@ -4,8 +4,8 @@ In alphabetical order: -* `Craig Anderson `_ -* `John Vandenberg `_ -* `Stephen J. Fuhry `_ -* `Stephen Rauch `_ -* `Víctor Antonio Hernández Monroy `_ +* `Craig Anderson `_ +* `John Vandenberg `_ +* `Stephen J. Fuhry `_ +* `Stephen Rauch `_ +* `Víctor Antonio Hernández Monroy `_ diff --git a/MANIFEST.in b/MANIFEST.in index 6c71554..cadad1b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,11 +1,8 @@ include README.rst include LICENSE include CHANGELOG.rst +include CONTRIBUTORS.rst recursive-include tests * -<<<<<<< HEAD -======= recursive-include docs * ->>>>>>> master -include docs/source/* include Makefile include test.sh diff --git a/docs/source/extendedcsv.rst b/docs/source/extendedcsv.rst index 64ba764..79318fd 100644 --- a/docs/source/extendedcsv.rst +++ b/docs/source/extendedcsv.rst @@ -74,6 +74,8 @@ Continue from previous example:: :hide: >>> import os + >>> if os.path.exists("your_file.csv"): + ... os.unlink("your_file.csv") >>> os.unlink("your_file__Sheet 1__0.csv") >>> os.unlink("your_file__Sheet 2__1.csv") diff --git a/docs/source/pagination.rst b/docs/source/pagination.rst index 51eeafe..cb9181a 100644 --- a/docs/source/pagination.rst +++ b/docs/source/pagination.rst @@ -60,3 +60,9 @@ Obvious, you could do both at the same time: The pagination support is available across all pyexcel-io plugins. +.. testcode:: + :hide: + + >>> import os + >>> if os.path.exists("your_file.csv"): + ... os.unlink("your_file.csv") diff --git a/docs/source/plaincsv.rst b/docs/source/plaincsv.rst index fe8f96c..c6c3490 100644 --- a/docs/source/plaincsv.rst +++ b/docs/source/plaincsv.rst @@ -172,4 +172,5 @@ When you read it back you will have to specify encoding too. >>> import os >>> os.unlink("your_file.csv") + >>> os.unlink("test-utf16-encoding.csv") >>> os.unlink(test_file) diff --git a/docs/source/pyinstaller.rst b/docs/source/pyinstaller.rst index d743033..d92179a 100644 --- a/docs/source/pyinstaller.rst +++ b/docs/source/pyinstaller.rst @@ -9,14 +9,16 @@ Built-in plugins for pyexcel-io In order to package every built-in plugins of pyexcel-io, you need to specify:: - --hidden-import pyexcel_io.readers.csvr + --hidden-import pyexcel_io.readers.csv_in_file + --hidden-import pyexcel_io.readers.csv_in_memory + --hidden-import pyexcel_io.readers.csv_content --hidden-import pyexcel_io.readers.csvz --hidden-import pyexcel_io.readers.tsv --hidden-import pyexcel_io.readers.tsvz - --hidden-import pyexcel_io.writers.csv_file_writer - --hidden-import pyexcel_io.writers.csv_memory_writer - --hidden-import pyexcel_io.writers.tsv_file_writer - --hidden-import pyexcel_io.writers.tsv_memory_writer + --hidden-import pyexcel_io.writers.csv_in_file + --hidden-import pyexcel_io.writers.csv_in_memory + --hidden-import pyexcel_io.writers.tsv_in_file + --hidden-import pyexcel_io.writers.tsv_in_memory --hidden-import pyexcel_io.writers.csvz_writer --hidden-import pyexcel_io.writers.tsvz_writer --hidden-import pyexcel_io.database.importers.django diff --git a/docs/source/renderer.rst b/docs/source/renderer.rst index a10631e..24a6f0a 100644 --- a/docs/source/renderer.rst +++ b/docs/source/renderer.rst @@ -46,3 +46,10 @@ And you may want use row_renderer to customize it to string: >>> data = get_data("your_file.csv", row_renderer=my_renderer) >>> data['your_file.csv'] [['1', '21', '31'], ['2', '22', '32'], ['3', '23', '33']] + +.. testcode:: + :hide: + + >>> import os + >>> if os.path.exists("your_file.csv"): + ... os.unlink("your_file.csv") diff --git a/pyexcel_io/_compact.py b/pyexcel_io/_compact.py index c179ce7..37a5694 100644 --- a/pyexcel_io/_compact.py +++ b/pyexcel_io/_compact.py @@ -16,17 +16,7 @@ import sys import types import logging - -PY2 = sys.version_info[0] == 2 -PY3_ABOVE = sys.version_info[0] >= 3 -PY26 = PY2 and sys.version_info[1] < 7 -PY27 = PY2 and sys.version_info[1] == 7 -PY27_ABOVE = PY27 or PY3_ABOVE - -if PY26: - from ordereddict import OrderedDict -else: - from collections import OrderedDict +from collections import OrderedDict try: from logging import NullHandler @@ -37,24 +27,11 @@ except ImportError: pass -if PY2: - from cStringIO import StringIO - from cStringIO import StringIO as BytesIO +from io import BytesIO, StringIO - text_type = unicode - irange = xrange - - class Iterator(object): - def next(self): - return type(self).__next__(self) - - -else: - from io import BytesIO, StringIO - - text_type = str - Iterator = object - irange = range +text_type = str +Iterator = object +irange = range def isstream(instance): diff --git a/pyexcel_io/plugins.py b/pyexcel_io/plugins.py index 742a074..89b0e6e 100644 --- a/pyexcel_io/plugins.py +++ b/pyexcel_io/plugins.py @@ -176,8 +176,7 @@ class NewIOManager(IOManager): ): __file_type = file_type.lower() plugin = self.load_me_now(f"{location}-{__file_type}", library=library) - handler = plugin() - return handler + return plugin def raise_exception(self, file_type): file_type = file_type.split("-")[1] @@ -224,14 +223,14 @@ def _do_additional_registration_for_new_plugins(plugin_info): ) -class FakeReaders: +class AllReaders: def get_all_formats(self): return OLD_READERS.get_all_formats().union( NEW_READERS.get_all_formats() ) -class FakeWriters: +class AllWriters: def get_all_formats(self): return OLD_WRITERS.get_all_formats().union( NEW_WRITERS.get_all_formats() @@ -242,8 +241,8 @@ OLD_READERS = IOManager(READER_PLUGIN, ioutils.AVAILABLE_READERS) OLD_WRITERS = IOManager(WRITER_PLUGIN, ioutils.AVAILABLE_WRITERS) NEW_WRITERS = NewIOManager(NEW_WRITER_PLUGIN, ioutils.AVAILABLE_WRITERS) NEW_READERS = NewIOManager(NEW_READER_PLUGIN, ioutils.AVAILABLE_READERS) -READERS = FakeReaders() -WRITERS = FakeWriters() +READERS = AllReaders() +WRITERS = AllWriters() def load_plugins(plugin_name_patterns, path, black_list, white_list): diff --git a/pyexcel_io/reader.py b/pyexcel_io/reader.py index 1103938..0d9a9aa 100644 --- a/pyexcel_io/reader.py +++ b/pyexcel_io/reader.py @@ -33,19 +33,21 @@ class Reader(object): self.keywords = None def open(self, file_name, **keywords): - self.reader = NEW_READERS.get_a_plugin( + reader_class = NEW_READERS.get_a_plugin( self.file_type, location="file", library=self.library ) self.keywords, native_sheet_keywords = clean_keywords(keywords) - return self.reader.open(file_name, **native_sheet_keywords) + self.reader = reader_class(file_name, **native_sheet_keywords) + return self.reader def open_content(self, file_content, **keywords): self.keywords, native_sheet_keywords = clean_keywords(keywords) try: - self.reader = NEW_READERS.get_a_plugin( + reader_class = NEW_READERS.get_a_plugin( self.file_type, location="content", library=self.library ) - return self.reader.open(file_content, **native_sheet_keywords) + self.reader = reader_class(file_content, **native_sheet_keywords) + return self.reader except ( exceptions.NoSupportingPluginFound, exceptions.SupportingPluginAvailableButNotInstalled, @@ -57,10 +59,11 @@ class Reader(object): def open_stream(self, file_stream, **keywords): self.keywords, native_sheet_keywords = clean_keywords(keywords) - self.reader = NEW_READERS.get_a_plugin( + reader_class = NEW_READERS.get_a_plugin( self.file_type, location="memory", library=self.library ) - return self.reader.open(file_stream, **native_sheet_keywords) + self.reader = reader_class(file_stream, **native_sheet_keywords) + return self.reader def read_sheet_by_name(self, sheet_name): """ diff --git a/pyexcel_io/readers/__init__.py b/pyexcel_io/readers/__init__.py index 1d00577..9cb3cb2 100644 --- a/pyexcel_io/readers/__init__.py +++ b/pyexcel_io/readers/__init__.py @@ -10,17 +10,17 @@ from pyexcel_io.plugins import NewIOPluginInfoChain NewIOPluginInfoChain(__name__).add_a_reader( - relative_plugin_class_path="csv_file_reader.FileReader", + relative_plugin_class_path="csv_in_file.FileReader", location="file", file_types=["csv"], stream_type="text", ).add_a_reader( - relative_plugin_class_path="csv_content_reader.ContentReader", + relative_plugin_class_path="csv_content.ContentReader", location="content", file_types=["csv"], stream_type="text", ).add_a_reader( - relative_plugin_class_path="csv_memory_reader.MemoryReader", + relative_plugin_class_path="csv_in_memory.MemoryReader", location="memory", file_types=["csv"], stream_type="text", diff --git a/pyexcel_io/readers/csv_content.py b/pyexcel_io/readers/csv_content.py new file mode 100644 index 0000000..96636cf --- /dev/null +++ b/pyexcel_io/readers/csv_content.py @@ -0,0 +1,30 @@ +import mmap + +import pyexcel_io.constants as constants +from pyexcel_io.book import _convert_content_to_stream +from pyexcel_io.readers.csv_sheet import CSVMemoryMapIterator +from pyexcel_io.readers.csv_in_memory import MemoryReader + + +class ContentReader(MemoryReader): + file_type = constants.FILE_FORMAT_CSV + + def __init__(self, file_content, **keywords): + file_stream = ContentReader.convert_content_to_stream( + file_content, self.file_type, **keywords + ) + super().__init__(file_stream, **keywords) + + @staticmethod + def convert_content_to_stream(file_content, file_type, **keywords): + encoding = keywords.get("encoding", "utf-8") + if isinstance(file_content, mmap.mmap): + # load from mmap + file_stream = CSVMemoryMapIterator(file_content, encoding) + else: + if isinstance(file_content, bytes): + file_content = file_content.decode(encoding) + + file_stream = _convert_content_to_stream(file_content, file_type) + + return file_stream diff --git a/pyexcel_io/readers/csv_content_reader.py b/pyexcel_io/readers/csv_content_reader.py deleted file mode 100644 index 52f29dc..0000000 --- a/pyexcel_io/readers/csv_content_reader.py +++ /dev/null @@ -1,21 +0,0 @@ -import mmap - -from pyexcel_io.book import _convert_content_to_stream -from pyexcel_io.readers.csvr import CSVMemoryMapIterator -from pyexcel_io.readers.csv_memory_reader import MemoryReader - - -class ContentReader(MemoryReader): - def open(self, file_content, **keywords): - encoding = keywords.get("encoding", "utf-8") - if isinstance(file_content, mmap.mmap): - # load from mmap - file_stream = CSVMemoryMapIterator(file_content, encoding) - else: - if isinstance(file_content, bytes): - file_content = file_content.decode(encoding) - - file_stream = _convert_content_to_stream( - file_content, self.file_type - ) - super(ContentReader, self).open(file_stream, **keywords) diff --git a/pyexcel_io/readers/csv_file_reader.py b/pyexcel_io/readers/csv_in_file.py similarity index 91% rename from pyexcel_io/readers/csv_file_reader.py rename to pyexcel_io/readers/csv_in_file.py index 5f196f0..fa68618 100644 --- a/pyexcel_io/readers/csv_file_reader.py +++ b/pyexcel_io/readers/csv_in_file.py @@ -4,23 +4,18 @@ import glob from pyexcel_io import constants from pyexcel_io.sheet import NamedContent -from pyexcel_io.readers.csvr import CSVFileReader +from pyexcel_io.readers.csv_sheet import CSVFileReader DEFAULT_NEWLINE = "\r\n" class FileReader(object): - def __init__(self): - self.handles = [] - - def set_type(self, _): - pass - - def open(self, file_name, **keywords): + def __init__(self, file_name, **keywords): """Load content from a file :params str filename: an accessible file path :returns: a book """ + self.handles = [] self.keywords = keywords self.__line_terminator = keywords.get( constants.KEYWORD_LINE_TERMINATOR, DEFAULT_NEWLINE diff --git a/pyexcel_io/readers/csv_memory_reader.py b/pyexcel_io/readers/csv_in_memory.py similarity index 88% rename from pyexcel_io/readers/csv_memory_reader.py rename to pyexcel_io/readers/csv_in_memory.py index 830d0f2..aa54693 100644 --- a/pyexcel_io/readers/csv_memory_reader.py +++ b/pyexcel_io/readers/csv_in_memory.py @@ -3,24 +3,20 @@ import re import pyexcel_io._compact as compact from pyexcel_io import constants from pyexcel_io.sheet import NamedContent -from pyexcel_io.readers.csvr import CSVinMemoryReader +from pyexcel_io.readers.csv_sheet import CSVinMemoryReader DEFAULT_SHEET_SEPARATOR_FORMATTER = f"---{constants.DEFAULT_NAME}---%s" class MemoryReader(object): - def __init__(self): - self.handles = [] - self.file_type = constants.FILE_FORMAT_CSV + file_type = constants.FILE_FORMAT_CSV - def set_type(self, _): - pass - - def open(self, file_stream, multiple_sheets=False, **keywords): + def __init__(self, file_stream, multiple_sheets=False, **keywords): """Load content from memory :params stream file_content: the actual file content in memory :returns: a book """ + self.handles = [] self.keywords = keywords self.__load_from_memory_flag = True self.__line_terminator = keywords.get( diff --git a/pyexcel_io/readers/csvr.py b/pyexcel_io/readers/csv_sheet.py similarity index 100% rename from pyexcel_io/readers/csvr.py rename to pyexcel_io/readers/csv_sheet.py diff --git a/pyexcel_io/readers/csvz.py b/pyexcel_io/readers/csvz.py index b1ca68e..a783071 100644 --- a/pyexcel_io/readers/csvz.py +++ b/pyexcel_io/readers/csvz.py @@ -12,18 +12,12 @@ import zipfile import chardet from pyexcel_io.sheet import NamedContent from pyexcel_io._compact import StringIO -from pyexcel_io.readers.csvr import CSVinMemoryReader +from pyexcel_io.readers.csv_sheet import CSVinMemoryReader class FileReader(object): - def __init__(self): + def __init__(self, file_alike_object, **keywords): self.content_array = [] - self.keywords = None - - def set_type(self, _): - pass - - def open(self, file_alike_object, **keywords): try: self.zipfile = zipfile.ZipFile(file_alike_object, "r") sheets = [ diff --git a/pyexcel_io/readers/tsv.py b/pyexcel_io/readers/tsv.py index d505994..79edea9 100644 --- a/pyexcel_io/readers/tsv.py +++ b/pyexcel_io/readers/tsv.py @@ -8,32 +8,31 @@ :license: New BSD License, see LICENSE for more details """ import pyexcel_io.constants as constants -from pyexcel_io.readers.csv_file_reader import FileReader -from pyexcel_io.readers.csv_memory_reader import MemoryReader -from pyexcel_io.readers.csv_content_reader import ContentReader +from pyexcel_io.readers.csv_content import ContentReader +from pyexcel_io.readers.csv_in_file import FileReader +from pyexcel_io.readers.csv_in_memory import MemoryReader class TSVFileReader(FileReader): - def open(self, file_name, **keywords): - keywords["dialect"] = constants.KEYWORD_TSV_DIALECT - super(TSVFileReader, self).open(file_name, **keywords) + def __init__(self, file_name, **keywords): + super().__init__( + file_name, dialect=constants.KEYWORD_TSV_DIALECT, **keywords + ) class TSVMemoryReader(MemoryReader): - def __init__(self): - self.handles = [] - self.file_type = constants.FILE_FORMAT_TSV + file_type = constants.FILE_FORMAT_TSV - def open(self, file_stream, **keywords): - keywords["dialect"] = constants.KEYWORD_TSV_DIALECT - super(TSVMemoryReader, self).open(file_stream, **keywords) + def __init__(self, file_stream, **keywords): + super().__init__( + file_stream, dialect=constants.KEYWORD_TSV_DIALECT, **keywords + ) class TSVContentReader(ContentReader): - def __init__(self): - self.handles = [] - self.file_type = constants.FILE_FORMAT_TSV + file_type = constants.FILE_FORMAT_TSV - def open(self, file_content, **keywords): - keywords["dialect"] = constants.KEYWORD_TSV_DIALECT - super(TSVContentReader, self).open(file_content, **keywords) + def __init__(self, file_content, **keywords): + super().__init__( + file_content, dialect=constants.KEYWORD_TSV_DIALECT, **keywords + ) diff --git a/pyexcel_io/readers/tsvz.py b/pyexcel_io/readers/tsvz.py index 1be5cba..bd6d904 100644 --- a/pyexcel_io/readers/tsvz.py +++ b/pyexcel_io/readers/tsvz.py @@ -18,6 +18,5 @@ class TSVZipFileReader(FileReader): it supports single tsv file and mulitple tsv files """ - def open(self, file_name, **keywords): - keywords["dialect"] = KEYWORD_TSV_DIALECT - super(TSVZipFileReader, self).open(file_name, **keywords) + def __init__(self, file_name, **keywords): + super().__init__(file_name, dialect=KEYWORD_TSV_DIALECT, **keywords) diff --git a/pyexcel_io/writer.py b/pyexcel_io/writer.py index 0e3e468..493f75b 100644 --- a/pyexcel_io/writer.py +++ b/pyexcel_io/writer.py @@ -11,24 +11,24 @@ class Writer(object): self.keyboards = None def open(self, file_name, **keywords): - self.writer = NEW_WRITERS.get_a_plugin( + writer_class = NEW_WRITERS.get_a_plugin( self.file_type, library=self.library, location="file" ) - self.writer.open(file_name, **keywords) + self.writer = writer_class(file_name, **keywords) def open_content(self, file_stream, **keywords): if not isstream(file_stream): raise IOError(MESSAGE_ERROR_03) - self.writer = NEW_WRITERS.get_a_plugin( + writer_class = NEW_WRITERS.get_a_plugin( self.file_type, library=self.library, location="content" ) - self.writer.open(file_stream, **keywords) + self.writer = writer_class(file_stream, **keywords) def open_stream(self, file_stream, **keywords): - self.writer = NEW_WRITERS.get_a_plugin( + writer_class = NEW_WRITERS.get_a_plugin( self.file_type, library=self.library, location="memory" ) - self.writer.open(file_stream, **keywords) + self.writer = writer_class(file_stream, **keywords) def write(self, incoming_dict): for sheet_name in incoming_dict: diff --git a/pyexcel_io/writers/csv_in_file.py b/pyexcel_io/writers/csv_in_file.py index d4ddce0..f093cdf 100644 --- a/pyexcel_io/writers/csv_in_file.py +++ b/pyexcel_io/writers/csv_in_file.py @@ -2,13 +2,11 @@ from pyexcel_io.writers.csv_sheet import CSVFileWriter class CsvFileWriter: - def __init__(self): - self.__index = 0 - self.writer = None - - def open(self, file_alike_object, **keywords): + def __init__(self, file_alike_object, **keywords): self._file_alike_object = file_alike_object self._keywords = keywords + self.__index = 0 + self.writer = None def create_sheet(self, name): self.writer = CSVFileWriter( diff --git a/pyexcel_io/writers/csv_in_memory.py b/pyexcel_io/writers/csv_in_memory.py index 3724439..a3ad8b2 100644 --- a/pyexcel_io/writers/csv_in_memory.py +++ b/pyexcel_io/writers/csv_in_memory.py @@ -2,12 +2,10 @@ from pyexcel_io.writers.csv_sheet import CSVMemoryWriter class CsvMemoryWriter: - def __init__(self): - self.__index = 0 - - def open(self, file_alike_object, **keywords): + def __init__(self, file_alike_object, **keywords): self._file_alike_object = file_alike_object self._keywords = keywords + self.__index = 0 def create_sheet(self, name): writer_class = CSVMemoryWriter diff --git a/pyexcel_io/writers/csvz_writer.py b/pyexcel_io/writers/csvz_writer.py index da390fd..19bffff 100644 --- a/pyexcel_io/writers/csvz_writer.py +++ b/pyexcel_io/writers/csvz_writer.py @@ -13,12 +13,8 @@ class CsvZipWriter(object): any other unzip software. """ - def __init__(self): - self.zipfile = None - self._keywords = None + def __init__(self, file_name, **keywords): self._file_type = FILE_FORMAT_CSVZ - - def open(self, file_name, **keywords): self.zipfile = zipfile.ZipFile(file_name, "w", zipfile.ZIP_DEFLATED) self._keywords = keywords diff --git a/pyexcel_io/writers/tsv_in_file.py b/pyexcel_io/writers/tsv_in_file.py index 03668ea..853285d 100644 --- a/pyexcel_io/writers/tsv_in_file.py +++ b/pyexcel_io/writers/tsv_in_file.py @@ -3,7 +3,7 @@ from pyexcel_io.writers.csv_in_file import CsvFileWriter class TsvFileWriter(CsvFileWriter): - def open(self, file_alike_object, **keywords): - super().open( + def __init__(self, file_alike_object, **keywords): + super().__init__( file_alike_object, dialect=KEYWORD_TSV_DIALECT, **keywords ) diff --git a/pyexcel_io/writers/tsv_in_memory.py b/pyexcel_io/writers/tsv_in_memory.py index e505c8b..0e1d305 100644 --- a/pyexcel_io/writers/tsv_in_memory.py +++ b/pyexcel_io/writers/tsv_in_memory.py @@ -3,7 +3,7 @@ from pyexcel_io.writers.csv_in_memory import CsvMemoryWriter class TsvMemoryWriter(CsvMemoryWriter): - def open(self, file_alike_object, **keywords): - super().open( + def __init__(self, file_alike_object, **keywords): + super().__init__( file_alike_object, dialect=KEYWORD_TSV_DIALECT, **keywords ) diff --git a/pyexcel_io/writers/tsvz_writer.py b/pyexcel_io/writers/tsvz_writer.py index 09b83fe..7eb438d 100644 --- a/pyexcel_io/writers/tsvz_writer.py +++ b/pyexcel_io/writers/tsvz_writer.py @@ -3,9 +3,6 @@ from pyexcel_io.writers.csvz_writer import CsvZipWriter class TsvZipWriter(CsvZipWriter): - def __init__(self): - super().__init__() + def __init__(self, file_name, **keywords): + super().__init__(file_name, dialect=KEYWORD_TSV_DIALECT, **keywords) self._file_type = FILE_FORMAT_TSVZ - - def open(self, file_name, **keywords): - super().open(file_name, dialect=KEYWORD_TSV_DIALECT, **keywords) diff --git a/tests/test_book.py b/tests/test_book.py index 14755a2..c14a380 100644 --- a/tests/test_book.py +++ b/tests/test_book.py @@ -4,7 +4,7 @@ from pyexcel_io.book import ( RWInterface, _convert_content_to_stream, ) -from pyexcel_io._compact import PY2, BytesIO, StringIO +from pyexcel_io._compact import BytesIO, StringIO from nose import SkipTest from nose.tools import raises @@ -41,18 +41,12 @@ def test_book_writer(): def test_convert_to_bytes_stream(): - if PY2: - raise SkipTest("No need test in python 2") - else: - file_content = b"test" - stream = _convert_content_to_stream(file_content, "csv") - assert isinstance(stream, StringIO) + file_content = b"test" + stream = _convert_content_to_stream(file_content, "csv") + assert isinstance(stream, StringIO) def test_convert_to_string_stream(): - if PY2: - raise SkipTest("No need test in python 2") - else: - file_content = "test" - stream = _convert_content_to_stream(file_content, "csvz") - assert isinstance(stream, BytesIO) + file_content = "test" + stream = _convert_content_to_stream(file_content, "csvz") + assert isinstance(stream, BytesIO) diff --git a/tests/test_csv_book.py b/tests/test_csv_book.py index 75175f8..381ca69 100644 --- a/tests/test_csv_book.py +++ b/tests/test_csv_book.py @@ -5,8 +5,8 @@ from unittest import TestCase import pyexcel_io.manager as manager from pyexcel_io.sheet import NamedContent from pyexcel_io.reader import EncapsulatedSheetReader -from pyexcel_io._compact import PY2, BytesIO, StringIO -from pyexcel_io.readers.csvr import ( +from pyexcel_io._compact import BytesIO, StringIO +from pyexcel_io.readers.csv_sheet import ( CSVFileReader, CSVSheetReader, CSVinMemoryReader, @@ -128,8 +128,6 @@ def test_utf16_decoding(): CSVFileReader(NamedContent("csv", test_file), encoding="utf-16") ) content = list(reader.to_array()) - if PY2: - content[0] = [s.encode("utf-8") for s in content[0]] expected = [["Äkkilähdöt", "Matkakirjoituksia", "Matkatoimistot"]] eq_(content, expected) @@ -144,8 +142,6 @@ def test_utf16_encoding(): writer.close() with open(test_file, "rb") as f: actual = f.read().decode("utf-16") - if PY2: - actual = actual.encode("utf-8") eq_(actual, "Äkkilähdöt,Matkakirjoituksia,Matkatoimistot\n") os.unlink(test_file) @@ -157,8 +153,6 @@ def test_utf16_memory_decoding(): CSVinMemoryReader(NamedContent("csv", test_content), encoding="utf-16") ) content = list(reader.to_array()) - if PY2: - content[0] = [s.encode("utf-8") for s in content[0]] expected = [["Äkkilähdöt", "Matkakirjoituksia", "Matkatoimistot"]] eq_(content, expected) @@ -175,6 +169,4 @@ def test_utf16_memory_encoding(): ) writer.write_array(content) actual = io.getvalue() - if PY2: - actual = actual.decode("utf-16") eq_(actual, u"Äkkilähdöt,Matkakirjoituksia,Matkatoimistot\n") diff --git a/tests/test_io.py b/tests/test_io.py index 1870c74..0646244 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -196,6 +196,7 @@ def test_file_handle_as_input(): with open(test_file, "r") as f: data = get_data(f, "csv") eq_(data["csv"], [[1, 2, 3]]) + os.unlink("file_handle.csv") def test_file_type_case_insensitivity(): @@ -206,6 +207,7 @@ def test_file_type_case_insensitivity(): with open(test_file, "r") as f: data = get_data(f, "csv") eq_(data["csv"], [[1, 2, 3]]) + os.unlink("file_handle.CSv") def test_file_handle_as_output(): @@ -216,6 +218,7 @@ def test_file_handle_as_output(): with open(test_file, "r") as f: content = f.read() eq_(content, "1,2,3\n") + os.unlink("file_handle.csv") def test_binary_file_content(): diff --git a/tests/test_issues.py b/tests/test_issues.py index 09582e0..b7b9dd5 100644 --- a/tests/test_issues.py +++ b/tests/test_issues.py @@ -5,7 +5,6 @@ import os import pyexcel as p from pyexcel_io import get_data, save_data -from pyexcel_io._compact import PY26 from nose import SkipTest from nose.tools import eq_ @@ -54,19 +53,14 @@ def test_issue_23(): def test_issue_33_34(): - if PY26: - pass - else: - import mmap + import mmap - test_file = get_fixture("issue20.csv") - with open(test_file, "r+b") as f: - memory_mapped_file = mmap.mmap( - f.fileno(), 0, access=mmap.ACCESS_READ - ) - data = get_data(memory_mapped_file, file_type="csv") - expected = [[u"to", u"infinity", u"and", u"beyond"]] - eq_(data["csv"], expected) + test_file = get_fixture("issue20.csv") + with open(test_file, "r+b") as f: + memory_mapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + data = get_data(memory_mapped_file, file_type="csv") + expected = [[u"to", u"infinity", u"and", u"beyond"]] + eq_(data["csv"], expected) def test_issue_30_utf8_BOM_header(): @@ -82,52 +76,31 @@ def test_issue_30_utf8_BOM_header(): def test_issue_33_34_utf32_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-32") + check_mmap_encoding("utf-32") def test_issue_33_34_utf32be_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-32-be") + check_mmap_encoding("utf-32-be") def test_issue_33_34_utf32le_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-32-le") + check_mmap_encoding("utf-32-le") def test_issue_33_34_utf16_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-16") + check_mmap_encoding("utf-16") def test_issue_33_34_utf16be_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-16-be") + check_mmap_encoding("utf-16-be") def test_issue_33_34_utf16le_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-16-le") + check_mmap_encoding("utf-16-le") def test_issue_33_34_utf8_encoded_file(): - if PY26: - pass - else: - check_mmap_encoding("utf-8") + check_mmap_encoding("utf-8") def check_mmap_encoding(encoding): diff --git a/tests/test_service.py b/tests/test_service.py index 5971708..8d0af9a 100644 --- a/tests/test_service.py +++ b/tests/test_service.py @@ -1,5 +1,4 @@ from pyexcel_io.service import ( - ODS_WRITE_FORMAT_COVERSION, date_value, time_value, ods_float_value, @@ -7,10 +6,8 @@ from pyexcel_io.service import ( detect_int_value, detect_float_value, ) -from pyexcel_io._compact import PY2 from pyexcel_io.exceptions import IntegerAccuracyLossError -from nose import SkipTest from nose.tools import eq_, raises @@ -101,34 +98,11 @@ def test_detect_float_value_on_custom_nan_text2(): eq_(str(result), "nan") -def test_ods_write_format_conversion(): - if PY2: - expected = ODS_WRITE_FORMAT_COVERSION[long] # noqa: F821 - eq_("long", expected) - else: - raise SkipTest() - - @raises(IntegerAccuracyLossError) def test_big_int_value(): ods_float_value(1000000000000000) -def test_max_value_on_python_2(): - if PY2: - ods_float_value(long(999999999999999)) - else: - raise SkipTest("No long in python 3") - - -@raises(IntegerAccuracyLossError) -def test_really_long_value_on_python2(): - if PY2: - ods_float_value(long(999999999999999 + 1)) - else: - raise SkipTest("No long in python 3") - - @raises(IntegerAccuracyLossError) def test_throw_exception(): throw_exception(1000000000000000)