debian-tablib/tablib/core.py

1042 lines
28 KiB
Python
Raw Normal View History

2010-07-13 19:42:44 +02:00
# -*- coding: utf-8 -*-
2010-10-05 23:30:13 +02:00
"""
tablib.core
~~~~~~~~~~~
2011-02-18 09:41:54 +01:00
This module implements the central Tablib objects.
2010-07-12 22:58:25 +02:00
2011-01-11 01:28:12 +01:00
:copyright: (c) 2011 by Kenneth Reitz.
2010-10-05 23:30:13 +02:00
:license: MIT, see LICENSE for more details.
"""
2010-08-30 04:41:34 +02:00
2010-11-04 08:55:42 +01:00
from copy import copy
2011-01-11 20:53:59 +01:00
from operator import itemgetter
2010-11-04 08:55:42 +01:00
2010-10-10 09:03:50 +02:00
from tablib import formats
2010-09-25 11:49:14 +02:00
2011-08-16 04:49:14 +02:00
from tablib.compat import OrderedDict, unicode
2011-02-18 09:13:44 +01:00
2010-08-30 04:41:34 +02:00
2010-09-25 10:45:22 +02:00
__title__ = 'tablib'
__version__ = '0.9.11'
__build__ = 0x000911
2010-07-13 19:42:44 +02:00
__author__ = 'Kenneth Reitz'
__license__ = 'MIT'
2011-01-11 01:28:12 +01:00
__copyright__ = 'Copyright 2011 Kenneth Reitz'
2011-02-21 20:07:42 +01:00
__docformat__ = 'restructuredtext'
2010-08-29 21:46:57 +02:00
2010-11-18 01:50:05 +01:00
2010-11-04 08:55:42 +01:00
class Row(object):
2011-02-17 22:31:52 +01:00
"""Internal Row object. Mainly used for filtering."""
2010-11-04 08:55:42 +01:00
__slots__ = ['_row', 'tags']
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def __init__(self, row=list(), tags=list()):
self._row = list(row)
self.tags = list(tags)
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def __iter__(self):
return (col for col in self._row)
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def __len__(self):
return len(self._row)
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def __repr__(self):
return repr(self._row)
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def __getslice__(self, i, j):
return self._row[i,j]
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def __getitem__(self, i):
return self._row[i]
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def __setitem__(self, i, value):
self._row[i] = value
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def __delitem__(self, i):
del self._row[i]
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def __getstate__(self):
slots = dict()
for slot in self.__slots__:
attribute = getattr(self, slot)
slots[slot] = attribute
return slots
2011-02-17 22:31:52 +01:00
def __setstate__(self, state):
2011-03-23 06:13:16 +01:00
for (k, v) in list(state.items()): setattr(self, k, v)
2011-06-22 01:42:56 +02:00
def rpush(self, value):
self.insert(0, value)
def lpush(self, value):
self.insert(len(value), value)
2011-02-17 22:31:52 +01:00
def append(self, value):
2011-06-22 01:42:56 +02:00
self.rpush(value)
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def insert(self, index, value):
self._row.insert(index, value)
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def __contains__(self, item):
return (item in self._row)
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
@property
def tuple(self):
2011-06-22 01:07:24 +02:00
"""Tuple representation of :class:`Row`."""
2011-02-17 22:31:52 +01:00
return tuple(self._row)
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
@property
def list(self):
2011-06-22 01:07:24 +02:00
"""List representation of :class:`Row`."""
2011-02-17 22:31:52 +01:00
return list(self._row)
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
def has_tag(self, tag):
"""Returns true if current row contains tag."""
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
if tag == None:
return False
2011-03-23 06:13:16 +01:00
elif isinstance(tag, str):
2011-02-17 22:31:52 +01:00
return (tag in self.tags)
else:
2011-02-18 07:44:59 +01:00
return bool(len(set(tag) & set(self.tags)))
2011-05-11 23:58:31 +02:00
2011-02-18 07:44:59 +01:00
2010-11-04 08:55:42 +01:00
2010-08-29 21:46:57 +02:00
2010-08-30 04:41:34 +02:00
class Dataset(object):
2011-02-17 22:31:52 +01:00
"""The :class:`Dataset` object is the heart of Tablib. It provides all core
2010-10-05 23:30:13 +02:00
functionality.
2010-11-17 22:51:43 +01:00
2010-10-05 23:30:13 +02:00
Usually you create a :class:`Dataset` instance in your main module, and append
rows as you collect data. ::
2010-11-17 22:51:43 +01:00
2010-10-05 23:30:13 +02:00
data = tablib.Dataset()
data.headers = ('name', 'age')
2010-11-17 22:51:43 +01:00
2010-10-05 23:30:13 +02:00
for (name, age) in some_collector():
data.append((name, age))
2010-11-17 22:51:43 +01:00
Setting columns is similar. The column data length must equal the
current height of the data and headers must be set ::
data = tablib.Dataset()
data.headers = ('first_name', 'last_name')
data.append(('John', 'Adams'))
data.append(('George', 'Washington'))
2011-08-11 06:47:23 +02:00
data.append_col((90, 67), header='age')
2013-11-08 09:03:53 +01:00
You can also set rows and headers upon instantiation. This is useful if
dealing with dozens or hundreds of :class:`Dataset` objects. ::
2010-11-17 22:51:43 +01:00
2010-10-05 23:30:13 +02:00
headers = ('first_name', 'last_name')
data = [('John', 'Adams'), ('George', 'Washington')]
2010-11-17 22:51:43 +01:00
2010-10-05 23:30:13 +02:00
data = tablib.Dataset(*data, headers=headers)
2010-11-17 22:51:43 +01:00
2010-10-05 23:30:13 +02:00
:param \*args: (optional) list of rows to populate Dataset
:param headers: (optional) list strings for Dataset header row
.. admonition:: Format Attributes Definition
2010-10-05 23:30:13 +02:00
2010-11-17 22:51:43 +01:00
If you look at the code, the various output/import formats are not
defined within the :class:`Dataset` object. To add support for a new format, see
:ref:`Adding New Formats <newformats>`.
2010-10-05 23:30:13 +02:00
2011-02-17 22:31:52 +01:00
"""
2010-08-30 04:41:34 +02:00
2011-02-17 22:31:52 +01:00
def __init__(self, *args, **kwargs):
self._data = list(Row(arg) for arg in args)
self.__headers = None
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
# ('title', index) tuples
self._separators = []
2011-05-11 23:58:31 +02:00
2011-03-23 05:20:39 +01:00
# (column, callback) tuples
self._formatters = []
2010-08-30 04:41:34 +02:00
2011-02-17 22:31:52 +01:00
try:
self.headers = kwargs['headers']
except KeyError:
self.headers = None
2010-08-30 04:41:34 +02:00
2011-02-17 22:31:52 +01:00
try:
self.title = kwargs['title']
except KeyError:
self.title = None
2010-08-30 08:38:59 +02:00
2011-02-17 22:31:52 +01:00
self._register_formats()
2010-09-08 23:35:13 +02:00
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
def __len__(self):
return self.height
2010-08-30 04:41:34 +02:00
2010-09-08 23:35:13 +02:00
2011-02-17 22:31:52 +01:00
def __getitem__(self, key):
if isinstance(key, str) or isinstance(key, unicode):
2011-02-17 22:31:52 +01:00
if key in self.headers:
pos = self.headers.index(key) # get 'key' index from each data
return [row[pos] for row in self._data]
else:
raise KeyError
else:
_results = self._data[key]
if isinstance(_results, Row):
return _results.tuple
else:
return [result.tuple for result in _results]
2011-02-17 22:31:52 +01:00
def __setitem__(self, key, value):
self._validate(value)
self._data[key] = Row(value)
2010-08-30 04:41:34 +02:00
2011-02-17 22:31:52 +01:00
def __delitem__(self, key):
if isinstance(key, str) or isinstance(key, unicode):
2010-10-10 10:37:09 +02:00
2011-02-17 22:31:52 +01:00
if key in self.headers:
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
pos = self.headers.index(key)
del self.headers[pos]
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
for i, row in enumerate(self._data):
2010-11-04 08:55:42 +01:00
2011-02-17 22:31:52 +01:00
del row[pos]
self._data[i] = row
else:
raise KeyError
else:
del self._data[key]
2010-08-30 04:41:34 +02:00
2010-09-08 23:35:13 +02:00
2011-02-17 22:31:52 +01:00
def __repr__(self):
try:
return '<%s dataset>' % (self.title.lower())
except AttributeError:
return '<dataset object>'
2010-08-30 04:41:34 +02:00
def __unicode__(self):
result = [self.__headers]
result.extend(list(map(unicode, row)) for row in self._data)
# here, we calculate max width for each column
lens = (list(map(len, row)) for row in result)
field_lens = list(map(max, zip(*lens)))
# delimiter between header and data
result.insert(1, ['-' * length for length in field_lens])
format_string = '|'.join('{%s:%s}' % item for item in enumerate(field_lens))
return '\n'.join(format_string.format(*row) for row in result)
def __str__(self):
return self.__unicode__()
2010-11-17 22:51:43 +01:00
2011-06-22 01:42:56 +02:00
# ---------
# Internals
# ---------
2011-02-17 22:31:52 +01:00
@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set))
except AttributeError:
setattr(cls, fmt.title, property(fmt.export_set))
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
except AttributeError:
pass
2010-09-25 11:49:14 +02:00
2010-08-30 04:41:34 +02:00
2011-02-17 22:31:52 +01:00
def _validate(self, row=None, col=None, safety=False):
"""Assures size of every row in dataset is of proper proportions."""
if row:
is_valid = (len(row) == self.width) if self.width else True
elif col:
if len(col) < 1:
is_valid = True
else:
is_valid = (len(col) == self.height) if self.height else True
else:
is_valid = all((len(x) == self.width for x in self._data))
2010-08-30 04:41:34 +02:00
2011-02-17 22:31:52 +01:00
if is_valid:
return True
else:
if not safety:
raise InvalidDimensions
return False
2010-08-30 07:01:32 +02:00
def _package(self, dicts=True, ordered=True):
2011-02-17 22:31:52 +01:00
"""Packages Dataset into lists of dictionaries for transmission."""
2011-06-22 01:42:56 +02:00
# TODO: Dicts default to false?
2010-08-30 09:23:56 +02:00
_data = list(self._data)
2011-05-11 23:58:31 +02:00
if ordered:
dict_pack = OrderedDict
else:
dict_pack = dict
# Execute formatters
if self._formatters:
for row_i, row in enumerate(_data):
for col, callback in self._formatters:
2011-05-11 23:58:31 +02:00
try:
if col is None:
for j, c in enumerate(row):
_data[row_i][j] = callback(c)
else:
_data[row_i][col] = callback(row[col])
except IndexError:
raise InvalidDatasetIndex
2011-05-11 23:58:31 +02:00
2011-02-17 22:31:52 +01:00
if self.headers:
if dicts:
data = [dict_pack(list(zip(self.headers, data_row))) for data_row in _data]
2011-02-17 22:31:52 +01:00
else:
data = [list(self.headers)] + list(_data)
2011-02-17 22:31:52 +01:00
else:
data = [list(row) for row in _data]
2010-08-30 04:41:34 +02:00
2011-02-17 22:31:52 +01:00
return data
2010-08-30 04:41:34 +02:00
2010-09-25 11:49:14 +02:00
2011-02-17 22:31:52 +01:00
def _get_headers(self):
"""An *optional* list of strings to be used for header rows and attribute names.
2010-11-17 22:51:43 +01:00
This must be set manually. The given list length must equal :class:`Dataset.width`.
2011-02-17 22:31:52 +01:00
"""
return self.__headers
2010-09-14 05:25:49 +02:00
2011-02-17 22:31:52 +01:00
def _set_headers(self, collection):
"""Validating headers setter."""
self._validate(collection)
if collection:
try:
self.__headers = list(collection)
except TypeError:
raise TypeError
else:
self.__headers = None
2010-09-14 05:49:16 +02:00
2011-02-17 22:31:52 +01:00
headers = property(_get_headers, _set_headers)
2010-09-14 05:25:49 +02:00
2011-06-22 01:42:56 +02:00
2011-02-17 22:31:52 +01:00
def _get_dict(self):
2011-05-11 23:58:31 +02:00
"""A native Python representation of the :class:`Dataset` object. If headers have
been set, a list of Python dictionaries will be returned. If no headers have been set,
2011-02-18 08:59:07 +01:00
a list of tuples (rows) will be returned instead.
2010-10-05 23:30:13 +02:00
2011-02-18 08:59:07 +01:00
A dataset object can also be imported by setting the `Dataset.dict` attribute: ::
2010-10-05 23:30:13 +02:00
data = tablib.Dataset()
data.json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
2011-02-17 22:31:52 +01:00
"""
return self._package()
2010-09-08 23:35:13 +02:00
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
def _set_dict(self, pickle):
"""A native Python representation of the Dataset object. If headers have been
set, a list of Python dictionaries will be returned. If no headers have been
set, a list of tuples (rows) will be returned instead.
2011-02-17 22:31:52 +01:00
A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. ::
2011-02-17 22:31:52 +01:00
data = tablib.Dataset()
data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
"""
2011-02-18 09:13:44 +01:00
2011-02-17 22:31:52 +01:00
if not len(pickle):
return
2010-10-05 23:30:13 +02:00
2011-02-17 22:31:52 +01:00
# if list of rows
if isinstance(pickle[0], list):
self.wipe()
for row in pickle:
self.append(Row(row))
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
# if list of objects
elif isinstance(pickle[0], dict):
self.wipe()
2011-03-23 06:13:16 +01:00
self.headers = list(pickle[0].keys())
2011-02-17 22:31:52 +01:00
for row in pickle:
2011-03-23 06:13:16 +01:00
self.append(Row(list(row.values())))
2011-02-17 22:31:52 +01:00
else:
raise UnsupportedFormat
2010-08-30 04:41:34 +02:00
2011-02-17 22:31:52 +01:00
dict = property(_get_dict, _set_dict)
2011-06-22 01:42:56 +02:00
def _clean_col(self, col):
"""Prepares the given column for insert/append."""
col = list(col)
if self.headers:
header = [col.pop(0)]
else:
header = []
if len(col) == 1 and hasattr(col[0], '__call__'):
col = list(map(col[0], self._data))
col = tuple(header + col)
return col
@property
def height(self):
"""The number of rows currently in the :class:`Dataset`.
Cannot be directly modified.
"""
return len(self._data)
@property
def width(self):
"""The number of columns currently in the :class:`Dataset`.
Cannot be directly modified.
"""
try:
return len(self._data[0])
except IndexError:
try:
return len(self.headers)
except TypeError:
return 0
# -------
# Formats
# -------
2011-02-17 22:31:52 +01:00
@property
def xls():
2011-05-13 07:34:24 +02:00
"""A Legacy Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`separators`. Cannot be set.
.. note::
XLS files are limited to a maximum of 65,000 rows. Use :class:`Dataset.xlsx` to avoid this limitation.
2011-02-17 22:31:52 +01:00
.. admonition:: Binary Warning
2011-02-17 22:31:52 +01:00
:class:`Dataset.xls` contains binary data, so make sure to write in binary mode::
2011-02-17 22:31:52 +01:00
with open('output.xls', 'wb') as f:
f.write(data.xls)'
"""
pass
@property
def xlsx():
2011-05-13 07:34:24 +02:00
"""An Excel '07+ Spreadsheet representation of the :class:`Dataset` object, with :ref:`separators`. Cannot be set.
.. admonition:: Binary Warning
:class:`Dataset.xlsx` contains binary data, so make sure to write in binary mode::
with open('output.xlsx', 'wb') as f:
f.write(data.xlsx)'
"""
pass
@property
def ods():
2011-05-15 16:00:47 +02:00
"""An OpenDocument Spreadsheet representation of the :class:`Dataset` object, with :ref:`separators`. Cannot be set.
.. admonition:: Binary Warning
:class:`Dataset.ods` contains binary data, so make sure to write in binary mode::
2011-05-15 16:00:47 +02:00
with open('output.ods', 'wb') as f:
f.write(data.ods)'
"""
pass
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
@property
def csv():
"""A CSV representation of the :class:`Dataset` object. The top row will contain
headers, if they have been set. Otherwise, the top row will contain
the first row of the dataset.
2011-02-17 22:31:52 +01:00
A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. ::
2011-02-17 22:31:52 +01:00
data = tablib.Dataset()
data.csv = 'age, first_name, last_name\\n90, John, Adams'
2011-02-17 22:31:52 +01:00
Import assumes (for now) that headers exist.
2011-08-11 06:47:23 +02:00
.. admonition:: Binary Warning
:class:`Dataset.csv` uses \\r\\n line endings by default, so make
sure to write in binary mode::
with open('output.csv', 'wb') as f:
f.write(data.csv)'
2011-08-11 06:47:23 +02:00
If you do not do this, and you export the file on Windows, your
CSV file will open in Excel with a blank line between each row.
2011-02-17 22:31:52 +01:00
"""
pass
2011-03-23 05:20:39 +01:00
2011-02-17 22:31:52 +01:00
@property
def tsv():
"""A TSV representation of the :class:`Dataset` object. The top row will contain
headers, if they have been set. Otherwise, the top row will contain
the first row of the dataset.
2010-10-19 10:45:54 +02:00
2011-02-17 22:31:52 +01:00
A dataset object can also be imported by setting the :class:`Dataset.tsv` attribute. ::
2010-10-19 10:45:54 +02:00
2011-02-17 22:31:52 +01:00
data = tablib.Dataset()
data.tsv = 'age\tfirst_name\tlast_name\\n90\tJohn\tAdams'
2010-10-19 10:45:54 +02:00
2011-02-17 22:31:52 +01:00
Import assumes (for now) that headers exist.
"""
2011-05-22 20:04:47 +02:00
pass
2010-10-19 10:45:54 +02:00
2011-02-17 22:31:52 +01:00
@property
def yaml():
"""A YAML representation of the :class:`Dataset` object. If headers have been
set, a YAML list of objects will be returned. If no headers have
been set, a YAML list of lists (rows) will be returned instead.
A dataset object can also be imported by setting the :class:`Dataset.yaml` attribute: ::
2011-02-17 22:31:52 +01:00
data = tablib.Dataset()
data.yaml = '- {age: 90, first_name: John, last_name: Adams}'
2011-02-17 22:31:52 +01:00
Import assumes (for now) that headers exist.
"""
pass
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
@property
def json():
"""A JSON representation of the :class:`Dataset` object. If headers have been
set, a JSON list of objects will be returned. If no headers have
been set, a JSON list of lists (rows) will be returned instead.
2011-02-17 22:31:52 +01:00
A dataset object can also be imported by setting the :class:`Dataset.json` attribute: ::
2011-02-17 22:31:52 +01:00
data = tablib.Dataset()
data.json = '[{age: 90, first_name: "John", liast_name: "Adams"}]'
2011-02-17 22:31:52 +01:00
Import assumes (for now) that headers exist.
"""
pass
2010-09-14 05:49:16 +02:00
2011-02-17 22:31:52 +01:00
@property
def html():
"""A HTML table representation of the :class:`Dataset` object. If
headers have been set, they will be used as table headers.
2011-02-17 22:31:52 +01:00
..notice:: This method can be used for export only.
"""
pass
2011-02-18 09:13:44 +01:00
2011-06-22 01:42:56 +02:00
# ----
# Rows
# ----
def insert(self, index, row, tags=list()):
"""Inserts a row to the :class:`Dataset` at the given index.
Rows inserted must be the correct size (height or width).
2011-06-22 01:42:56 +02:00
The default behaviour is to insert the given row to the :class:`Dataset`
2011-08-11 06:47:23 +02:00
object at the given index.
"""
2011-06-22 01:42:56 +02:00
self._validate(row)
self._data.insert(index, Row(row, tags=tags))
def rpush(self, row, tags=list()):
"""Adds a row to the end of the :class:`Dataset`.
See :class:`Dataset.insert` for additional documentation.
"""
self.insert(self.height, row=row, tags=tags)
def lpush(self, row, tags=list()):
"""Adds a row to the top of the :class:`Dataset`.
See :class:`Dataset.insert` for additional documentation.
"""
self.insert(0, row=row, tags=tags)
def append(self, row, tags=list()):
"""Adds a row to the :class:`Dataset`.
See :class:`Dataset.insert` for additional documentation.
"""
self.rpush(row, tags)
def extend(self, rows, tags=list()):
"""Adds a list of rows to the :class:`Dataset` using
:class:`Dataset.append`
"""
for row in rows:
self.append(row, tags)
2011-06-22 01:42:56 +02:00
2011-06-22 02:03:50 +02:00
def lpop(self):
"""Removes and returns the first row of the :class:`Dataset`."""
cache = self[0]
del self[0]
return cache
def rpop(self):
2011-06-22 02:02:12 +02:00
"""Removes and returns the last row of the :class:`Dataset`."""
cache = self[-1]
del self[-1]
return cache
2011-06-22 02:03:50 +02:00
def pop(self):
"""Removes and returns the last row of the :class:`Dataset`."""
return self.rpop()
2011-06-22 01:42:56 +02:00
# -------
# Columns
# -------
def insert_col(self, index, col=None, header=None):
"""Inserts a column to the :class:`Dataset` at the given index.
Columns inserted must be the correct height.
You can also insert a column of a single callable object, which will
add a new column with the return values of the callable each as an
item in the column. ::
data.append_col(col=random.randint)
If inserting a column, and :class:`Dataset.headers` is set, the
header attribute must be set, and will be considered the header for
that row.
See :ref:`dyncols` for an in-depth example.
.. versionchanged:: 0.9.0
If inserting a column, and :class:`Dataset.headers` is set, the
header attribute must be set, and will be considered the header for
that row.
.. versionadded:: 0.9.0
If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting.
This gives you the ability to :class:`filter <Dataset.filter>` your
:class:`Dataset` later.
2011-08-11 06:47:23 +02:00
2011-06-22 01:42:56 +02:00
"""
2011-08-11 06:47:23 +02:00
if col is None:
col = []
2011-06-22 01:42:56 +02:00
# Callable Columns...
if hasattr(col, '__call__'):
col = list(map(col, self._data))
2011-06-22 01:42:56 +02:00
col = self._clean_col(col)
self._validate(col=col)
if self.headers:
# pop the first item off, add to headers
if not header:
raise HeadersNeeded()
# corner case - if header is set without data
2011-08-11 06:47:23 +02:00
elif header and self.height == 0 and len(col):
raise InvalidDimensions
2011-06-22 01:42:56 +02:00
self.headers.insert(index, header)
2011-06-22 01:42:56 +02:00
if self.height and self.width:
for i, row in enumerate(self._data):
row.insert(index, col[i])
self._data[i] = row
else:
self._data = [Row([row]) for row in col]
def rpush_col(self, col, header=None):
"""Adds a column to the end of the :class:`Dataset`.
See :class:`Dataset.insert` for additional documentation.
"""
2010-11-04 08:55:42 +01:00
2011-06-22 01:42:56 +02:00
self.insert_col(self.width, col, header=header)
def lpush_col(self, col, header=None):
"""Adds a column to the top of the :class:`Dataset`.
See :class:`Dataset.insert` for additional documentation.
"""
self.insert_col(0, col, header=header)
2010-09-08 23:35:13 +02:00
2011-02-18 09:13:44 +01:00
2011-02-17 22:31:52 +01:00
def insert_separator(self, index, text='-'):
"""Adds a separator to :class:`Dataset` at given index."""
2010-10-04 16:53:48 +02:00
2011-02-17 22:31:52 +01:00
sep = (index, text)
self._separators.append(sep)
2010-11-04 10:43:44 +01:00
2010-10-04 16:53:48 +02:00
2011-02-17 22:31:52 +01:00
def append_separator(self, text='-'):
2011-05-11 23:58:31 +02:00
"""Adds a :ref:`separator <separators>` to the :class:`Dataset`."""
2010-10-04 17:33:16 +02:00
2011-02-17 22:31:52 +01:00
# change offsets if headers are or aren't defined
if not self.headers:
index = self.height if self.height else 0
else:
index = (self.height + 1) if self.height else 1
2010-10-04 17:33:16 +02:00
2011-02-17 22:31:52 +01:00
self.insert_separator(index, text)
2010-10-04 16:53:48 +02:00
2011-06-22 01:42:56 +02:00
def append_col(self, col, header=None):
"""Adds a column to the :class:`Dataset`.
See :class:`Dataset.insert_col` for additional documentation.
"""
self.rpush_col(col, header)
def get_col(self, index):
"""Returns the column from the :class:`Dataset` at the given index."""
return [row[index] for row in self._data]
2011-06-22 01:42:56 +02:00
# ----
# Misc
# ----
def add_formatter(self, col, handler):
2011-03-23 05:20:39 +01:00
"""Adds a :ref:`formatter` to the :class:`Dataset`.
2011-05-11 23:58:31 +02:00
2011-03-23 05:20:39 +01:00
.. versionadded:: 0.9.5
:param col: column to. Accepts index int or header str.
2011-05-11 23:58:31 +02:00
:param handler: reference to callback function to execute
2011-03-23 05:20:39 +01:00
against each cell value.
"""
2011-05-11 23:58:31 +02:00
2011-03-23 06:13:16 +01:00
if isinstance(col, str):
2011-03-23 05:20:39 +01:00
if col in self.headers:
2011-03-23 05:49:25 +01:00
col = self.headers.index(col) # get 'key' index from each data
2011-03-23 05:20:39 +01:00
else:
raise KeyError
2011-05-11 23:58:31 +02:00
if not col > self.width:
self._formatters.append((col, handler))
else:
raise InvalidDatasetIndex
2011-05-11 23:58:31 +02:00
2011-03-23 05:20:39 +01:00
return True
2011-05-11 23:58:31 +02:00
2011-03-23 05:20:39 +01:00
2011-02-17 22:31:52 +01:00
def filter(self, tag):
"""Returns a new instance of the :class:`Dataset`, excluding any rows
that do not contain the given :ref:`tags <tags>`.
"""
_dset = copy(self)
_dset._data = [row for row in _dset._data if row.has_tag(tag)]
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
return _dset
2011-01-11 20:53:59 +01:00
2011-02-17 22:31:52 +01:00
def sort(self, col, reverse=False):
"""Sort a :class:`Dataset` by a specific column, given string (for
header) or integer (for column index). The order can be reversed by
2011-05-11 23:58:31 +02:00
setting ``reverse`` to ``True``.
2011-06-22 01:42:56 +02:00
2011-02-17 22:31:52 +01:00
Returns a new :class:`Dataset` instance where columns have been
2011-06-22 01:42:56 +02:00
sorted.
"""
2011-05-11 23:58:31 +02:00
if isinstance(col, str) or isinstance(col, unicode):
2011-01-11 20:53:59 +01:00
2011-02-17 22:31:52 +01:00
if not self.headers:
raise HeadersNeeded
2011-02-17 22:31:52 +01:00
_sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
_dset = Dataset(headers=self.headers)
2011-02-17 22:31:52 +01:00
for item in _sorted:
row = [item[key] for key in self.headers]
_dset.append(row=row)
2011-02-17 22:31:52 +01:00
else:
if self.headers:
col = self.headers[col]
2011-02-17 22:31:52 +01:00
_sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
_dset = Dataset(headers=self.headers)
2011-01-11 20:53:59 +01:00
2011-02-17 22:31:52 +01:00
for item in _sorted:
if self.headers:
row = [item[key] for key in self.headers]
else:
row = item
_dset.append(row=row)
2011-01-11 20:53:59 +01:00
2011-02-17 22:31:52 +01:00
return _dset
2011-01-11 20:53:59 +01:00
2011-02-18 09:13:44 +01:00
2011-02-17 22:31:52 +01:00
def transpose(self):
"""Transpose a :class:`Dataset`, turning rows into columns and vice
versa, returning a new ``Dataset`` instance. The first row of the
original instance becomes the new header row."""
2011-02-17 22:31:52 +01:00
# Don't transpose if there is no data
if not self:
return
2011-02-17 22:31:52 +01:00
_dset = Dataset()
# The first element of the headers stays in the headers,
# it is our "hinge" on which we rotate the data
new_headers = [self.headers[0]] + self[self.headers[0]]
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
_dset.headers = new_headers
for column in self.headers:
2011-02-17 22:31:52 +01:00
if column == self.headers[0]:
# It's in the headers, so skip it
continue
2011-02-17 22:31:52 +01:00
# Adding the column name as now they're a regular column
row_data = [column] + self[column]
row_data = Row(row_data)
_dset.append(row=row_data)
2011-02-17 22:31:52 +01:00
return _dset
2010-11-18 02:01:31 +01:00
2011-06-22 01:42:56 +02:00
def stack(self, other):
2011-02-17 22:31:52 +01:00
"""Stack two :class:`Dataset` instances together by
joining at the row level, and return new combined
``Dataset`` instance."""
2011-02-17 22:31:52 +01:00
if not isinstance(other, Dataset):
return
2011-02-17 22:31:52 +01:00
if self.width != other.width:
raise InvalidDimensions
2011-02-17 22:31:52 +01:00
# Copy the source data
_dset = copy(self)
2010-11-18 02:01:31 +01:00
2011-02-17 22:31:52 +01:00
rows_to_stack = [row for row in _dset._data]
other_rows = [row for row in other._data]
2011-02-17 22:31:52 +01:00
rows_to_stack.extend(other_rows)
_dset._data = rows_to_stack
2010-11-18 02:01:31 +01:00
2011-02-17 22:31:52 +01:00
return _dset
2011-06-22 01:42:56 +02:00
def stack_cols(self, other):
2011-02-17 22:31:52 +01:00
"""Stack two :class:`Dataset` instances together by
joining at the column level, and return a new
combined ``Dataset`` instance. If either ``Dataset``
has headers set, than the other must as well."""
2011-02-17 22:31:52 +01:00
if not isinstance(other, Dataset):
return
2011-02-17 22:31:52 +01:00
if self.headers or other.headers:
if not self.headers or not other.headers:
raise HeadersNeeded
2011-02-17 22:31:52 +01:00
if self.height != other.height:
raise InvalidDimensions
2011-02-17 22:31:52 +01:00
try:
new_headers = self.headers + other.headers
except TypeError:
new_headers = None
2011-02-17 22:31:52 +01:00
_dset = Dataset()
2011-02-17 22:31:52 +01:00
for column in self.headers:
2011-06-22 01:42:56 +02:00
_dset.append_col(col=self[column])
2011-02-17 22:31:52 +01:00
for column in other.headers:
2011-06-22 01:42:56 +02:00
_dset.append_col(col=other[column])
2011-02-17 22:31:52 +01:00
_dset.headers = new_headers
2011-02-17 22:31:52 +01:00
return _dset
2011-03-23 05:20:39 +01:00
2011-02-17 22:31:52 +01:00
def wipe(self):
"""Removes all content and headers from the :class:`Dataset` object."""
self._data = list()
self.__headers = None
2010-08-30 11:31:45 +02:00
2010-09-08 23:35:13 +02:00
2011-03-23 05:20:39 +01:00
2010-09-20 20:18:18 +02:00
class Databook(object):
2011-02-17 22:31:52 +01:00
"""A book of :class:`Dataset` objects.
"""
2010-09-08 23:35:13 +02:00
2011-02-17 22:31:52 +01:00
def __init__(self, sets=None):
2010-11-21 13:14:47 +01:00
2011-02-17 22:31:52 +01:00
if sets is None:
self._datasets = list()
else:
self._datasets = sets
2010-11-21 13:14:47 +01:00
2011-02-17 22:31:52 +01:00
self._register_formats()
2010-08-30 09:45:35 +02:00
2011-02-17 22:31:52 +01:00
def __repr__(self):
try:
return '<%s databook>' % (self.title.lower())
except AttributeError:
return '<databook object>'
2010-09-25 11:49:14 +02:00
2010-10-04 16:53:48 +02:00
2011-02-17 22:31:52 +01:00
def wipe(self):
"""Removes all :class:`Dataset` objects from the :class:`Databook`."""
self._datasets = []
2010-10-04 16:53:48 +02:00
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book))
except AttributeError:
setattr(cls, fmt.title, property(fmt.export_book))
2010-11-17 22:51:43 +01:00
2011-02-17 22:31:52 +01:00
except AttributeError:
pass
2010-08-30 07:01:32 +02:00
2010-09-14 05:25:49 +02:00
2011-02-17 22:31:52 +01:00
def add_sheet(self, dataset):
"""Adds given :class:`Dataset` to the :class:`Databook`."""
if isinstance(dataset, Dataset):
2011-02-17 22:31:52 +01:00
self._datasets.append(dataset)
else:
raise InvalidDatasetType
2010-11-17 22:51:43 +01:00
2010-09-14 05:25:49 +02:00
def _package(self, ordered=True):
2011-02-17 22:31:52 +01:00
"""Packages :class:`Databook` for delivery."""
collector = []
if ordered:
dict_pack = OrderedDict
else:
dict_pack = dict
2011-02-17 22:31:52 +01:00
for dset in self._datasets:
collector.append(dict_pack(
2011-02-17 22:31:52 +01:00
title = dset.title,
data = dset._package(ordered=ordered)
2011-02-17 22:31:52 +01:00
))
return collector
2010-09-12 05:09:06 +02:00
2010-09-14 05:25:49 +02:00
2011-02-17 22:31:52 +01:00
@property
def size(self):
"""The number of the :class:`Dataset` objects within :class:`Databook`."""
return len(self._datasets)
2010-09-08 23:35:13 +02:00
2010-09-14 05:25:49 +02:00
2010-09-26 00:03:03 +02:00
def detect(stream):
2011-02-17 22:31:52 +01:00
"""Return (format, stream) of given stream."""
for fmt in formats.available:
try:
if fmt.detect(stream):
return (fmt, stream)
except AttributeError:
pass
return (None, stream)
2010-11-17 22:51:43 +01:00
def import_set(stream):
2011-02-17 22:31:52 +01:00
"""Return dataset of given stream."""
(format, stream) = detect(stream)
2011-02-17 22:31:52 +01:00
try:
data = Dataset()
format.import_set(data, stream)
return data
2010-11-17 22:51:43 +01:00
2011-05-12 22:44:39 +02:00
except AttributeError:
2011-02-17 22:31:52 +01:00
return None
2010-09-14 05:25:49 +02:00
def import_book(stream):
"""Return dataset of given stream."""
(format, stream) = detect(stream)
try:
databook = Databook()
format.import_book(databook, stream)
return databook
except AttributeError:
return None
2010-09-08 23:35:13 +02:00
class InvalidDatasetType(Exception):
2011-02-17 22:31:52 +01:00
"Only Datasets can be added to a DataBook"
2010-09-08 23:35:13 +02:00
2010-09-14 05:25:49 +02:00
2010-08-30 04:41:34 +02:00
class InvalidDimensions(Exception):
2011-02-17 22:31:52 +01:00
"Invalid size"
2011-05-11 23:58:31 +02:00
class InvalidDatasetIndex(Exception):
"Outside of Dataset size"
2010-08-30 04:41:34 +02:00
class HeadersNeeded(Exception):
2011-02-17 22:31:52 +01:00
"Header parameter must be given when appending a column in this Dataset."
2010-08-30 07:39:38 +02:00
class UnsupportedFormat(NotImplementedError):
2011-02-17 22:31:52 +01:00
"Format is not supported"