debian-django-import-export/import_export/formats/base_formats.py

245 lines
6.4 KiB
Python

from __future__ import unicode_literals
from django.utils.six import moves
import sys
import warnings
import tablib
try:
from tablib.compat import xlrd
XLS_IMPORT = True
except ImportError:
try:
import xlrd # NOQA
XLS_IMPORT = True
except ImportError:
xls_warning = "Installed `tablib` library does not include"
"import support for 'xls' format and xlrd module is not found."
warnings.warn(xls_warning, ImportWarning)
XLS_IMPORT = False
try:
import openpyxl
XLSX_IMPORT = True
except ImportError:
try:
from tablib.compat import openpyxl
XLSX_IMPORT = hasattr(openpyxl, 'load_workbook')
except ImportError:
xlsx_warning = "Installed `tablib` library does not include"
"import support for 'xlsx' format and openpyxl module is not found."
warnings.warn(xlsx_warning, ImportWarning)
XLSX_IMPORT = False
try:
from importlib import import_module
except ImportError:
from django.utils.importlib import import_module
class Format(object):
def get_title(self):
return type(self)
def create_dataset(self, in_stream):
"""
Create dataset from given string.
"""
raise NotImplementedError()
def export_data(self, dataset, **kwargs):
"""
Returns format representation for given dataset.
"""
raise NotImplementedError()
def is_binary(self):
"""
Returns if this format is binary.
"""
return True
def get_read_mode(self):
"""
Returns mode for opening files.
"""
return 'rb'
def get_extension(self):
"""
Returns extension for this format files.
"""
return ""
def get_content_type(self):
# For content types see
# https://www.iana.org/assignments/media-types/media-types.xhtml
return 'application/octet-stream'
def can_import(self):
return False
def can_export(self):
return False
class TablibFormat(Format):
TABLIB_MODULE = None
CONTENT_TYPE = 'application/octet-stream'
def get_format(self):
"""
Import and returns tablib module.
"""
return import_module(self.TABLIB_MODULE)
def get_title(self):
return self.get_format().title
def create_dataset(self, in_stream, **kwargs):
data = tablib.Dataset()
self.get_format().import_set(data, in_stream, **kwargs)
return data
def export_data(self, dataset, **kwargs):
return self.get_format().export_set(dataset, **kwargs)
def get_extension(self):
# we support both 'extentions' and 'extensions' because currently
# tablib's master branch uses 'extentions' (which is a typo) but it's
# dev branch already uses 'extension'.
# TODO - remove this once the typo is fixxed in tablib's master branch
if hasattr(self.get_format(), 'extentions'):
return self.get_format().extentions[0]
return self.get_format().extensions[0]
def get_content_type(self):
return self.CONTENT_TYPE
def can_import(self):
return hasattr(self.get_format(), 'import_set')
def can_export(self):
return hasattr(self.get_format(), 'export_set')
class TextFormat(TablibFormat):
def get_read_mode(self):
if sys.version_info[0] < 3: # backwards compatibility for python 2.7
return 'rU'
else:
return 'r'
def is_binary(self):
return False
class CSV(TextFormat):
TABLIB_MODULE = 'tablib.formats._csv'
CONTENT_TYPE = 'text/csv'
def create_dataset(self, in_stream, **kwargs):
if sys.version_info[0] < 3:
# python 2.7 csv does not do unicode
return super(CSV, self).create_dataset(in_stream.encode('utf-8'), **kwargs)
return super(CSV, self).create_dataset(in_stream, **kwargs)
class JSON(TextFormat):
TABLIB_MODULE = 'tablib.formats._json'
CONTENT_TYPE = 'application/json'
class YAML(TextFormat):
TABLIB_MODULE = 'tablib.formats._yaml'
# See https://stackoverflow.com/questions/332129/yaml-mime-type
CONTENT_TYPE = 'text/yaml'
class TSV(TextFormat):
TABLIB_MODULE = 'tablib.formats._tsv'
CONTENT_TYPE = 'text/tab-separated-values'
def create_dataset(self, in_stream, **kwargs):
if sys.version_info[0] < 3:
# python 2.7 csv does not do unicode
return super(TSV, self).create_dataset(in_stream.encode('utf-8'), **kwargs)
return super(TSV, self).create_dataset(in_stream, **kwargs)
class ODS(TextFormat):
TABLIB_MODULE = 'tablib.formats._ods'
CONTENT_TYPE = 'application/vnd.oasis.opendocument.spreadsheet'
class HTML(TextFormat):
TABLIB_MODULE = 'tablib.formats._html'
CONTENT_TYPE = 'text/html'
class XLS(TablibFormat):
TABLIB_MODULE = 'tablib.formats._xls'
CONTENT_TYPE = 'application/vnd.ms-excel'
def can_import(self):
return XLS_IMPORT
def create_dataset(self, in_stream):
"""
Create dataset from first sheet.
"""
assert XLS_IMPORT
xls_book = xlrd.open_workbook(file_contents=in_stream)
dataset = tablib.Dataset()
sheet = xls_book.sheets()[0]
dataset.headers = sheet.row_values(0)
for i in moves.range(1, sheet.nrows):
dataset.append(sheet.row_values(i))
return dataset
class XLSX(TablibFormat):
TABLIB_MODULE = 'tablib.formats._xlsx'
CONTENT_TYPE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
def can_import(self):
return XLSX_IMPORT
def create_dataset(self, in_stream):
"""
Create dataset from first sheet.
"""
assert XLSX_IMPORT
from io import BytesIO
xlsx_book = openpyxl.load_workbook(BytesIO(in_stream), read_only=True)
dataset = tablib.Dataset()
sheet = xlsx_book.active
# obtain generator
rows = sheet.rows
dataset.headers = [cell.value for cell in next(rows)]
for row in rows:
row_values = [cell.value for cell in row]
dataset.append(row_values)
return dataset
#: These are the default formats for import and export. Whether they can be
#: used or not is depending on their implementation in the tablib library.
DEFAULT_FORMATS = (
CSV,
XLS,
XLSX,
TSV,
ODS,
JSON,
YAML,
HTML,
)