Changes for Python 3 compatibility, including vendorizing xlrd3

This commit is contained in:
Marc Abramowitz 2012-05-15 07:19:15 -07:00
parent 6a825a8a39
commit a3781e3c89
14 changed files with 8036 additions and 11 deletions

View File

@ -49,13 +49,20 @@ setup(
'tablib', 'tablib.formats',
'tablib.packages',
'tablib.packages.xlwt',
'tablib.packages.xlwt3',
'tablib.packages.xlrd',
'tablib.packages.xlrd3',
'tablib.packages.omnijson',
'tablib.packages.odf',
'tablib.packages.odf3',
'tablib.packages.openpyxl',
'tablib.packages.openpyxl.shared',
'tablib.packages.openpyxl.reader',
'tablib.packages.openpyxl.writer',
'tablib.packages.openpyxl3',
'tablib.packages.openpyxl3.shared',
'tablib.packages.openpyxl3.reader',
'tablib.packages.openpyxl3.writer',
'tablib.packages.yaml',
'tablib.packages.unicodecsv'
],

View File

@ -23,6 +23,8 @@ except ImportError:
if is_py3:
from io import BytesIO
import tablib.packages.xlwt3 as xlwt
import tablib.packages.xlrd3 as xlrd
from tablib.packages.xlrd3.biffh import XLRDError
from tablib.packages import markup3 as markup
from tablib.packages import openpyxl3 as openpyxl
from tablib.packages.odf3 import opendocument, style, text, table
@ -40,6 +42,7 @@ else:
from cStringIO import StringIO
import tablib.packages.xlwt as xlwt
import tablib.packages.xlrd as xlrd
from tablib.packages.xlrd.biffh import XLRDError
from tablib.packages import markup
from itertools import ifilter
from tablib.packages import openpyxl
@ -47,4 +50,4 @@ else:
from tablib.packages import unicodecsv as csv
unicode = unicode
unicode = unicode

View File

@ -226,19 +226,21 @@ class Dataset(object):
def __unicode__(self):
result = [self.__headers]
result.extend(map(unicode, row) for row in self._data)
result.extend(list(map(unicode, row)) for row in self._data)
# here, we calculate max width for each column
lens = (map(len, row) for row in result)
field_lens = map(max, zip(*lens))
lens = (list(map(len, row)) for row in result)
field_lens = list(map(max, zip(*lens)))
# delimiter between header and data
result.insert(1, [u'-' * length for length in field_lens])
result.insert(1, ['-' * length for length in field_lens])
format_string = u'|'.join(u'{%s:%s}' % item for item in enumerate(field_lens))
format_string = '|'.join('{%s:%s}' % item for item in enumerate(field_lens))
return u'\n'.join(format_string.format(*row) for row in result)
return '\n'.join(format_string.format(*row) for row in result)
def __str__(self):
return self.__unicode__()
# ---------

View File

@ -5,9 +5,7 @@
import sys
from tablib.compat import BytesIO, xlwt
from tablib.packages import xlrd
from tablib.packages.xlrd.biffh import XLRDError
from tablib.compat import BytesIO, xlwt, xlrd, XLRDError
import tablib
title = 'xls'

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,620 @@
# Support module for the xlrd3 package.
#
# Portions copyright (c) 2005-2008 Stephen John Machin, Lingfo Pty Ltd
# This module is part of the xlrd package, which is released under a
# BSD-style licence.
#
# 2010-12-08 mozman refactoring for python 3
# 2008-02-10 SJM BIFF2 BLANK record
# 2008-02-08 SJM Preparation for Excel 2.0 support
# 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for
# biff_dump & biff_count
# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
# 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte.
# 2007-04-22 SJM Remove experimental "trimming" facility.
import sys
from struct import unpack
encoding_from_codepage = {
1200 : 'utf_16_le',
10000: 'mac_roman',
10006: 'mac_greek', # guess
10007: 'mac_cyrillic', # guess
10029: 'mac_latin2', # guess
10079: 'mac_iceland', # guess
10081: 'mac_turkish', # guess
32768: 'mac_roman',
32769: 'cp1252',
}
# some more guessing, for Indic scripts
# codepage 57000 range:
# 2 Devanagari [0]
# 3 Bengali [1]
# 4 Tamil [5]
# 5 Telegu [6]
# 6 Assamese [1] c.f. Bengali
# 7 Oriya [4]
# 8 Kannada [7]
# 9 Malayalam [8]
# 10 Gujarati [3]
# 11 Gurmukhi [2]
FUN = 0 # unknown
FDT = 1 # date
FNU = 2 # number
FGE = 3 # general
FTX = 4 # text
DATEFORMAT = FDT
NUMBERFORMAT = FNU
XL_CELL_EMPTY = 0
XL_CELL_TEXT = 1
XL_CELL_NUMBER = 2
XL_CELL_DATE = 3
XL_CELL_BOOLEAN = 4
XL_CELL_ERROR = 5
XL_CELL_BLANK = 6 # for use in debugging, gathering stats, etc
biff_text_from_num = {
0: "(not BIFF)",
20: "2.0",
21: "2.1",
30: "3",
40: "4S",
45: "4W",
50: "5",
70: "7",
80: "8",
85: "8X",
}
# This dictionary can be used to produce a text version of the internal codes
# that Excel uses for error cells. Here are its contents:
error_text_from_code = {
0x00: '#NULL!', # Intersection of two cell ranges is empty
0x07: '#DIV/0!', # Division by zero
0x0F: '#VALUE!', # Wrong type of operand
0x17: '#REF!', # Illegal or deleted cell reference
0x1D: '#NAME?', # Wrong function or range name
0x24: '#NUM!', # Value range overflow
0x2A: '#N/A!', # Argument or function not available
}
BIFF_FIRST_UNICODE = 80
XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5
XL_WORKBOOK_GLOBALS_4W = 0x100
XL_WORKSHEET = WRKSHEET = 0x10
XL_BOUNDSHEET_WORKSHEET = 0x00
XL_BOUNDSHEET_CHART = 0x02
XL_BOUNDSHEET_VB_MODULE = 0x06
# XL_RK2 = 0x7e
XL_ARRAY = 0x0221
XL_ARRAY2 = 0x0021
XL_BLANK = 0x0201
XL_BLANK_B2 = 0x01
XL_BOF = 0x809
XL_BOOLERR = 0x205
XL_BOOLERR_B2 = 0x5
XL_BOUNDSHEET = 0x85
XL_BUILTINFMTCOUNT = 0x56
XL_CF = 0x01B1
XL_CODEPAGE = 0x42
XL_COLINFO = 0x7D
XL_COLUMNDEFAULT = 0x20 # BIFF2 only
XL_COLWIDTH = 0x24 # BIFF2 only
XL_CONDFMT = 0x01B0
XL_CONTINUE = 0x3c
XL_COUNTRY = 0x8C
XL_DATEMODE = 0x22
XL_DEFAULTROWHEIGHT = 0x0225
XL_DEFCOLWIDTH = 0x55
XL_DIMENSION = 0x200
XL_DIMENSION2 = 0x0
XL_EFONT = 0x45
XL_EOF = 0x0a
XL_EXTERNNAME = 0x23
XL_EXTERNSHEET = 0x17
XL_EXTSST = 0xff
XL_FEAT11 = 0x872
XL_FILEPASS = 0x2f
XL_FONT = 0x31
XL_FONT_B3B4 = 0x231
XL_FORMAT = 0x41e
XL_FORMAT2 = 0x1E # BIFF2, BIFF3
XL_FORMULA = 0x6
XL_FORMULA3 = 0x206
XL_FORMULA4 = 0x406
XL_GCW = 0xab
XL_INDEX = 0x20b
XL_INTEGER = 0x2 # BIFF2 only
XL_IXFE = 0x44 # BIFF2 only
XL_LABEL = 0x204
XL_LABEL_B2 = 0x04
XL_LABELRANGES = 0x15f
XL_LABELSST = 0xfd
XL_MERGEDCELLS = 0xE5
XL_MSO_DRAWING = 0x00EC
XL_MSO_DRAWING_GROUP = 0x00EB
XL_MSO_DRAWING_SELECTION = 0x00ED
XL_MULRK = 0xbd
XL_MULBLANK = 0xbe
XL_NAME = 0x18
XL_NOTE = 0x1c
XL_NUMBER = 0x203
XL_NUMBER_B2 = 0x3
XL_OBJ = 0x5D
XL_PALETTE = 0x92
XL_RK = 0x27e
XL_ROW = 0x208
XL_ROW_B2 = 0x08
XL_RSTRING = 0xd6
XL_SHEETHDR = 0x8F # BIFF4W only
XL_SHEETSOFFSET = 0x8E # BIFF4W only
XL_SHRFMLA = 0x04bc
XL_SST = 0xfc
XL_STANDARDWIDTH = 0x99
XL_STRING = 0x207
XL_STRING_B2 = 0x7
XL_STYLE = 0x293
XL_SUPBOOK = 0x1AE
XL_TABLEOP = 0x236
XL_TABLEOP2 = 0x37
XL_TABLEOP_B2 = 0x36
XL_TXO = 0x1b6
XL_UNCALCED = 0x5e
XL_UNKNOWN = 0xffff
XL_WINDOW2 = 0x023E
XL_WRITEACCESS = 0x5C
XL_XF = 0xe0
XL_XF2 = 0x0043 # BIFF2 version of XF record
XL_XF3 = 0x0243 # BIFF3 version of XF record
XL_XF4 = 0x0443 # BIFF4 version of XF record
boflen = {
0x0809: 8,
0x0409: 6,
0x0209: 6,
0x0009: 4,
}
bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)
XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)
_cell_opcode_list = (
XL_BOOLERR,
XL_FORMULA,
XL_FORMULA3,
XL_FORMULA4,
XL_LABEL,
XL_LABELSST,
XL_MULRK,
XL_NUMBER,
XL_RK,
XL_RSTRING,
)
biff_rec_name_dict = {
0x0000: 'DIMENSIONS_B2',
0x0001: 'BLANK_B2',
0x0002: 'INTEGER_B2_ONLY',
0x0003: 'NUMBER_B2',
0x0004: 'LABEL_B2',
0x0005: 'BOOLERR_B2',
0x0006: 'FORMULA',
0x0007: 'STRING_B2',
0x0008: 'ROW_B2',
0x0009: 'BOF_B2',
0x000A: 'EOF',
0x000B: 'INDEX_B2_ONLY',
0x000C: 'CALCCOUNT',
0x000D: 'CALCMODE',
0x000E: 'PRECISION',
0x000F: 'REFMODE',
0x0010: 'DELTA',
0x0011: 'ITERATION',
0x0012: 'PROTECT',
0x0013: 'PASSWORD',
0x0014: 'HEADER',
0x0015: 'FOOTER',
0x0016: 'EXTERNCOUNT',
0x0017: 'EXTERNSHEET',
0x0018: 'NAME_B2,5+',
0x0019: 'WINDOWPROTECT',
0x001A: 'VERTICALPAGEBREAKS',
0x001B: 'HORIZONTALPAGEBREAKS',
0x001C: 'NOTE',
0x001D: 'SELECTION',
0x001E: 'FORMAT_B2-3',
0x001F: 'BUILTINFMTCOUNT_B2',
0x0020: 'COLUMNDEFAULT_B2_ONLY',
0x0021: 'ARRAY_B2_ONLY',
0x0022: 'DATEMODE',
0x0023: 'EXTERNNAME',
0x0024: 'COLWIDTH_B2_ONLY',
0x0025: 'DEFAULTROWHEIGHT_B2_ONLY',
0x0026: 'LEFTMARGIN',
0x0027: 'RIGHTMARGIN',
0x0028: 'TOPMARGIN',
0x0029: 'BOTTOMMARGIN',
0x002A: 'PRINTHEADERS',
0x002B: 'PRINTGRIDLINES',
0x002F: 'FILEPASS',
0x0031: 'FONT',
0x0032: 'FONT2_B2_ONLY',
0x0036: 'TABLEOP_B2',
0x0037: 'TABLEOP2_B2',
0x003C: 'CONTINUE',
0x003D: 'WINDOW1',
0x003E: 'WINDOW2_B2',
0x0040: 'BACKUP',
0x0041: 'PANE',
0x0042: 'CODEPAGE',
0x0043: 'XF_B2',
0x0044: 'IXFE_B2_ONLY',
0x0045: 'EFONT_B2_ONLY',
0x004D: 'PLS',
0x0051: 'DCONREF',
0x0055: 'DEFCOLWIDTH',
0x0056: 'BUILTINFMTCOUNT_B3-4',
0x0059: 'XCT',
0x005A: 'CRN',
0x005B: 'FILESHARING',
0x005C: 'WRITEACCESS',
0x005D: 'OBJECT',
0x005E: 'UNCALCED',
0x005F: 'SAVERECALC',
0x0063: 'OBJECTPROTECT',
0x007D: 'COLINFO',
0x007E: 'RK2_mythical_?',
0x0080: 'GUTS',
0x0081: 'WSBOOL',
0x0082: 'GRIDSET',
0x0083: 'HCENTER',
0x0084: 'VCENTER',
0x0085: 'BOUNDSHEET',
0x0086: 'WRITEPROT',
0x008C: 'COUNTRY',
0x008D: 'HIDEOBJ',
0x008E: 'SHEETSOFFSET',
0x008F: 'SHEETHDR',
0x0090: 'SORT',
0x0092: 'PALETTE',
0x0099: 'STANDARDWIDTH',
0x009B: 'FILTERMODE',
0x009C: 'FNGROUPCOUNT',
0x009D: 'AUTOFILTERINFO',
0x009E: 'AUTOFILTER',
0x00A0: 'SCL',
0x00A1: 'SETUP',
0x00AB: 'GCW',
0x00BD: 'MULRK',
0x00BE: 'MULBLANK',
0x00C1: 'MMS',
0x00D6: 'RSTRING',
0x00D7: 'DBCELL',
0x00DA: 'BOOKBOOL',
0x00DD: 'SCENPROTECT',
0x00E0: 'XF',
0x00E1: 'INTERFACEHDR',
0x00E2: 'INTERFACEEND',
0x00E5: 'MERGEDCELLS',
0x00E9: 'BITMAP',
0x00EB: 'MSO_DRAWING_GROUP',
0x00EC: 'MSO_DRAWING',
0x00ED: 'MSO_DRAWING_SELECTION',
0x00EF: 'PHONETIC',
0x00FC: 'SST',
0x00FD: 'LABELSST',
0x00FF: 'EXTSST',
0x013D: 'TABID',
0x015F: 'LABELRANGES',
0x0160: 'USESELFS',
0x0161: 'DSF',
0x01AE: 'SUPBOOK',
0x01AF: 'PROTECTIONREV4',
0x01B0: 'CONDFMT',
0x01B1: 'CF',
0x01B2: 'DVAL',
0x01B6: 'TXO',
0x01B7: 'REFRESHALL',
0x01B8: 'HLINK',
0x01BC: 'PASSWORDREV4',
0x01BE: 'DV',
0x01C0: 'XL9FILE',
0x01C1: 'RECALCID',
0x0200: 'DIMENSIONS',
0x0201: 'BLANK',
0x0203: 'NUMBER',
0x0204: 'LABEL',
0x0205: 'BOOLERR',
0x0206: 'FORMULA_B3',
0x0207: 'STRING',
0x0208: 'ROW',
0x0209: 'BOF',
0x020B: 'INDEX_B3+',
0x0218: 'NAME',
0x0221: 'ARRAY',
0x0223: 'EXTERNNAME_B3-4',
0x0225: 'DEFAULTROWHEIGHT',
0x0231: 'FONT_B3B4',
0x0236: 'TABLEOP',
0x023E: 'WINDOW2',
0x0243: 'XF_B3',
0x027E: 'RK',
0x0293: 'STYLE',
0x0406: 'FORMULA_B4',
0x0409: 'BOF',
0x041E: 'FORMAT',
0x0443: 'XF_B4',
0x04BC: 'SHRFMLA',
0x0800: 'QUICKTIP',
0x0809: 'BOF',
0x0862: 'SHEETLAYOUT',
0x0867: 'SHEETPROTECTION',
0x0868: 'RANGEPROTECTION',
}
class XLRDError(Exception):
pass
class BaseObject:
"""
Parent of almost all other classes in the package. Defines a common
'dump' method for debugging.
"""
_repr_these = []
def dump(self, f=None, header=None, footer=None, indent=0):
"""
:param f: open file object, to which the dump is written
:param header: text to write before the dump
:param footer: text to write after the dump
:param indent: number of leading spaces (for recursive calls)
"""
if f is None:
f = sys.stderr
pad = " " * indent
if header is not None:
print(header, file=f)
for attr, value in sorted(self.__dict__.items()):
if getattr(value, 'dump', None) and attr != 'book':
value.dump(f,
header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),
indent=indent+4)
elif attr not in self._repr_these and \
(isinstance(value, list) or
isinstance(value, dict)):
print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f)
else:
print("%s%s: %r" % (pad, attr, value), file=f)
if footer is not None:
print(footer, file=f)
def fprintf(f, fmt, *vargs):
print(fmt.rstrip('\n') % vargs, file=f)
def upkbits(tgt_obj, src, manifest, local_setattr=setattr):
for n, mask, attr in manifest:
local_setattr(tgt_obj, attr, (src & mask) >> n)
def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):
for n, mask, attr in manifest:
local_setattr(tgt_obj, attr, local_int((src & mask) >> n))
def unpack_string(data, pos, encoding, lenlen=1):
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
pos += lenlen
return str(data[pos:pos+nchars], encoding)
def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):
if known_len is not None:
# On a NAME record, the length byte is detached from the front of the string.
nchars = known_len
else:
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
pos += lenlen
newpos = pos + nchars
return (str(data[pos:newpos], encoding), newpos)
def unpack_unicode(data, pos, lenlen=2):
""" Return unicode_strg """
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
if not nchars:
# Ambiguous whether 0-length string should have an "options" byte.
# Avoid crash if missing.
return ""
pos += lenlen
options = data[pos]
pos += 1
if options & 0x08: # richtext
pos += 2
if options & 0x04: # phonetic
pos += 4
if options & 0x01:
# Uncompressed UTF-16-LE
rawstrg = data[pos:pos+2*nchars]
strg = str(rawstrg, 'utf_16_le')
else:
# Note: this is COMPRESSED (not ASCII!) encoding!!!
# Merely returning the raw bytes would work OK 99.99% of the time
# if the local codepage was cp1252 -- however this would rapidly go pear-shaped
# for other codepages so we grit our Anglocentric teeth and return Unicode :-)
strg = str(data[pos:pos+nchars], "latin_1")
return strg
def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):
""" Return (unicode_strg, updated value of pos) """
if known_len is not None:
# On a NAME record, the length byte is detached from the front of the string.
nchars = known_len
else:
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
pos += lenlen
if not nchars and not data[pos:]:
# Zero-length string with no options byte
return ("", pos)
options = data[pos]
pos += 1
phonetic = options & 0x04
richtext = options & 0x08
if richtext:
rt = unpack('<H', data[pos:pos+2])[0]
pos += 2
if phonetic:
sz = unpack('<i', data[pos:pos+4])[0]
pos += 4
if options & 0x01:
# Uncompressed UTF-16-LE
strg = str(data[pos:pos+2*nchars], 'utf_16_le')
pos += 2*nchars
else:
# Note: this is COMPRESSED (not ASCII!) encoding!!!
strg = str(data[pos:pos+nchars], "latin_1")
pos += nchars
if richtext:
pos += 4 * rt
if phonetic:
pos += sz
return (strg, pos)
def unpack_cell_range_address_list_update_pos(
output_list, data, pos, biff_version, addr_size=6):
# output_list is updated in situ
if biff_version < 80:
assert addr_size == 6
else:
assert addr_size in (6, 8)
n, = unpack("<H", data[pos:pos+2])
pos += 2
if n:
fmt = "<HHBB" if addr_size == 6 else "<HHHH"
for _unused in range(n):
ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])
output_list.append((ra, rb+1, ca, cb+1))
pos += addr_size
return pos
def hex_char_dump(strg, ofs, dlen, base=0, fout=sys.stdout, unnumbered=False):
endpos = min(ofs + dlen, len(strg))
pos = ofs
numbered = not unnumbered
num_prefix = ''
while pos < endpos:
endsub = min(pos + 16, endpos)
substrg = strg[pos:endsub]
lensub = endsub - pos
if lensub <= 0 or lensub != len(substrg):
fprintf(
sys.stdout,
'??? hex_char_dump: ofs=%d dlen=%d base=%d -> endpos=%d pos=%d endsub=%d substrg=%r\n',
ofs, dlen, base, endpos, pos, endsub, substrg)
break
hexd = ''.join(["%02x " % c for c in substrg])
chard = ''
for c in substrg:
if c == ord('\0'):
c = '~'
elif not (' ' <= chr(c) <= '~'):
c = '?'
if isinstance(c, int):
c = chr(c)
chard += c
if numbered:
num_prefix = "%5d: " % (base+pos-ofs)
fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard)
pos = endsub
def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout,
unnumbered=False):
pos = stream_offset
stream_end = stream_offset + stream_len
adj = base - stream_offset
dummies = 0
numbered = not unnumbered
num_prefix = ''
while stream_end - pos >= 4:
rc, length = unpack('<HH', mem[pos:pos+4])
if rc == 0 and length == 0:
if mem[pos:] == '\0' * (stream_end - pos):
dummies = stream_end - pos
savpos = pos
pos = stream_end
break
if dummies:
dummies += 4
else:
savpos = pos
dummies = 4
pos += 4
else:
if dummies:
if numbered:
num_prefix = "%5d: " % (adj + savpos)
fprintf(fout, "%s---- %d zero bytes skipped ----\n",
num_prefix, dummies)
dummies = 0
recname = biff_rec_name_dict.get(rc, '<UNKNOWN>')
if numbered:
num_prefix = "%5d: " % (adj + pos)
fprintf(fout, "%s%04x %s len = %04x (%d)\n",
num_prefix, rc, recname, length, length)
pos += 4
hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered)
pos += length
if dummies:
if numbered:
num_prefix = "%5d: " % (adj + savpos)
fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
if pos < stream_end:
if numbered:
num_prefix = "%5d: " % (adj + pos)
fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix)
hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered)
elif pos > stream_end:
fprintf(fout, "Last dumped record has length (%d) that is too large\n", length)
def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout):
pos = stream_offset
stream_end = stream_offset + stream_len
tally = {}
while stream_end - pos >= 4:
rc, length = unpack('<HH', mem[pos:pos+4])
if rc == 0 and length == 0:
if mem[pos:] == '\0' * (stream_end - pos):
break
recname = "<Dummy (zero)>"
else:
recname = biff_rec_name_dict.get(rc, None)
if recname is None:
recname = "Unknown_0x%04X" % rc
if recname in tally:
tally[recname] += 1
else:
tally[recname] = 1
pos += length + 4
for recname, count in sorted(tally.items()):
fprintf(fout, "%8d %s", count, recname)

View File

@ -0,0 +1,346 @@
# Implements the minimal functionality required
# to extract a "Workbook" or "Book" stream (as one big string)
# from an OLE2 Compound Document file.
#
# Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd
# This module is part of the xlrd3 package, which is released under a BSD-style licence.
# No part of the content of this file was derived from the works of David Giffin.
# 2008-11-04 SJM Avoid assertion error when -1 used instead of -2 for first_SID of empty SCSS [Frank Hoffsuemmer]
# 2007-09-08 SJM Warning message if sector sizes are extremely large.
# 2007-05-07 SJM Meaningful exception instead of IndexError if a SAT (sector allocation table) is corrupted.
# 2007-04-22 SJM Missing "<" in a struct.unpack call => can't open files on bigendian platforms.
import sys
from struct import unpack
# Magic cookie that should appear in the first 8 bytes of the file.
SIGNATURE = b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"
EOCSID = -2
FREESID = -1
SATSID = -3
MSATSID = -4
def int_floor_div(x, y):
return divmod(x, y)[0]
class CompDocError(Exception):
pass
class DirNode(object):
def __init__(self, DID, dent, DEBUG=0):
# dent is the 128-byte directory entry
self.DID = DID
# (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
# self.root_DID,
# self.first_SID,
# self.tot_size) = \
# unpack('<HBBiii16x4x8x8xii4x', dent[64:128])
(cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
self.root_DID) = \
unpack('<HBBiii', dent[64:80])
(self.first_SID, self.tot_size) = \
unpack('<ii', dent[116:124])
if cbufsize == 0:
self.name = ''
else:
self.name = str(dent[0:cbufsize-2], 'utf_16_le') # omit the trailing U+0000
self.children = [] # filled in later
self.parent = -1 # indicates orphan; fixed up later
self.tsinfo = unpack('<IIII', dent[100:116])
if DEBUG:
self.dump(DEBUG)
def dump(self, DEBUG=1):
print("DID=%d name=%r etype=%d DIDs(left=%d right=%d root=%d parent=%d kids=%r) first_SID=%d tot_size=%d" \
% (self.DID, self.name, self.etype, self.left_DID,
self.right_DID, self.root_DID, self.parent, self.children, self.first_SID, self.tot_size))
if DEBUG == 2:
# cre_lo, cre_hi, mod_lo, mod_hi = tsinfo
print("timestamp info", self.tsinfo)
def _build_family_tree(dirlist, parent_DID, child_DID):
if child_DID < 0: return
_build_family_tree(dirlist, parent_DID, dirlist[child_DID].left_DID)
dirlist[parent_DID].children.append(child_DID)
dirlist[child_DID].parent = parent_DID
_build_family_tree(dirlist, parent_DID, dirlist[child_DID].right_DID)
if dirlist[child_DID].etype == 1: # storage
_build_family_tree(dirlist, child_DID, dirlist[child_DID].root_DID)
# Compound document handler.
# @param mem The raw contents of the file, as a string, or as an mmap.mmap() object. The
# only operation it needs to support is slicing.
class CompDoc(object):
def __init__(self, mem, logfile=sys.stdout, DEBUG=0):
self.logfile = logfile
if mem[0:8] != SIGNATURE:
raise CompDocError('Not an OLE2 compound document')
if mem[28:30] != b'\xFE\xFF':
raise CompDocError('Expected "little-endian" marker, found %r' % mem[28:30])
revision, version = unpack('<HH', mem[24:28])
if DEBUG:
print("\nCompDoc format: version=0x%04x revision=0x%04x" % (version, revision), file=logfile)
self.mem = mem
ssz, sssz = unpack('<HH', mem[30:34])
if ssz > 20: # allows for 2**20 bytes i.e. 1MB
print("WARNING: sector size (2**%d) is preposterous; assuming 512 and continuing ..." \
% ssz, file=logfile)
ssz = 9
if sssz > ssz:
print("WARNING: short stream sector size (2**%d) is preposterous; assuming 64 and continuing ..." \
% sssz, file=logfile)
sssz = 6
self.sec_size = sec_size = 1 << ssz
self.short_sec_size = 1 << sssz
(
SAT_tot_secs, self.dir_first_sec_sid, _unused, self.min_size_std_stream,
SSAT_first_sec_sid, SSAT_tot_secs,
MSAT_first_sec_sid, MSAT_tot_secs,
# ) = unpack('<ii4xiiiii', mem[44:76])
) = unpack('<iiiiiiii', mem[44:76])
mem_data_len = len(mem) - 512
mem_data_secs, left_over = divmod(mem_data_len, sec_size)
if left_over:
#### raise CompDocError("Not a whole number of sectors")
print("WARNING *** file size (%d) not 512 + multiple of sector size (%d)" \
% (len(mem), sec_size), file=logfile)
if DEBUG:
print('sec sizes', ssz, sssz, sec_size, self.short_sec_size, file=logfile)
print("mem data: %d bytes == %d sectors" % (mem_data_len, mem_data_secs), file=logfile)
print("SAT_tot_secs=%d, dir_first_sec_sid=%d, min_size_std_stream=%d" \
% (SAT_tot_secs, self.dir_first_sec_sid, self.min_size_std_stream,), file=logfile)
print("SSAT_first_sec_sid=%d, SSAT_tot_secs=%d" % (SSAT_first_sec_sid, SSAT_tot_secs,), file=logfile)
print("MSAT_first_sec_sid=%d, MSAT_tot_secs=%d" % (MSAT_first_sec_sid, MSAT_tot_secs,), file=logfile)
nent = int_floor_div(sec_size, 4) # number of SID entries in a sector
fmt = "<%di" % nent
trunc_warned = 0
#
# === build the MSAT ===
#
MSAT = list(unpack('<109i', mem[76:512]))
sid = MSAT_first_sec_sid
while sid >= 0:
if sid >= mem_data_secs:
raise CompDocError(
"MSAT extension: accessing sector %d but only %d in file" % (sid, mem_data_secs)
)
offset = 512 + sec_size * sid
news = list(unpack(fmt, mem[offset:offset+sec_size]))
sid = news.pop()
MSAT.extend(news)
if DEBUG:
print("MSAT: len =", len(MSAT), file=logfile)
print(MSAT, file=logfile)
#
# === build the SAT ===
#
self.SAT = []
for msid in MSAT:
if msid == FREESID: continue
if msid >= mem_data_secs:
if not trunc_warned:
print("WARNING *** File is truncated, or OLE2 MSAT is corrupt!!", file=logfile)
print("INFO: Trying to access sector %d but only %d available" \
% (msid, mem_data_secs), file=logfile)
trunc_warned = 1
continue
offset = 512 + sec_size * msid
news = list(unpack(fmt, mem[offset:offset+sec_size]))
self.SAT.extend(news)
if DEBUG:
print("SAT: len =", len(self.SAT), file=logfile)
print(self.SAT, file=logfile)
# print >> logfile, "SAT ",
# for i, s in enumerate(self.SAT):
# print >> logfile, "entry: %4d offset: %6d, next entry: %4d" % (i, 512 + sec_size * i, s)
# print >> logfile, "%d:%d " % (i, s),
print()
# === build the directory ===
#
dbytes = self._get_stream(
self.mem, 512, self.SAT, self.sec_size, self.dir_first_sec_sid,
name="directory")
dirlist = []
did = -1
for pos in range(0, len(dbytes), 128):
did += 1
dirlist.append(DirNode(did, dbytes[pos:pos+128], 0))
self.dirlist = dirlist
_build_family_tree(dirlist, 0, dirlist[0].root_DID) # and stand well back ...
if DEBUG:
for d in dirlist:
d.dump(DEBUG)
#
# === get the SSCS ===
#
sscs_dir = self.dirlist[0]
assert sscs_dir.etype == 5 # root entry
if sscs_dir.first_SID < 0 and sscs_dir.tot_size == 0:
# Problem reported by Frank Hoffsuemmer: some software was
# writing -1 instead of -2 (EOCSID) for the first_SID
# when the SCCS was empty. Not having EOCSID caused assertion
# failure in _get_stream.
# Solution: avoid calling _get_stream in any case when the
# SCSS appears to be empty.
self.SSCS = ""
else:
self.SSCS = self._get_stream(
self.mem, 512, self.SAT, sec_size, sscs_dir.first_SID,
sscs_dir.tot_size, name="SSCS")
# if DEBUG: print >> logfile, "SSCS", repr(self.SSCS)
#
# === build the SSAT ===
#
self.SSAT = []
if SSAT_tot_secs > 0 and sscs_dir.tot_size == 0:
print("WARNING *** OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero", file=logfile)
if sscs_dir.tot_size > 0:
sid = SSAT_first_sec_sid
nsecs = SSAT_tot_secs
while sid >= 0 and nsecs > 0:
nsecs -= 1
start_pos = 512 + sid * sec_size
news = list(unpack(fmt, mem[start_pos:start_pos+sec_size]))
self.SSAT.extend(news)
sid = self.SAT[sid]
# assert SSAT_tot_secs == 0 or sid == EOCSID
if DEBUG: print("SSAT last sid %d; remaining sectors %d" % (sid, nsecs), file=logfile)
assert nsecs == 0 and sid == EOCSID
if DEBUG: print("SSAT", self.SSAT, file=logfile)
def _get_stream(self, mem, base, sat, sec_size, start_sid, size=None, name=''):
# print >> self.logfile, "_get_stream", base, sec_size, start_sid, size
sectors = []
s = start_sid
if size is None:
# nothing to check against
while s >= 0:
start_pos = base + s * sec_size
sectors.append(mem[start_pos:start_pos+sec_size])
try:
s = sat[s]
except IndexError:
raise CompDocError(
"OLE2 stream %r: sector allocation table invalid entry (%d)" %
(name, s)
)
assert s == EOCSID
else:
todo = size
while s >= 0:
start_pos = base + s * sec_size
grab = sec_size
if grab > todo:
grab = todo
todo -= grab
sectors.append(mem[start_pos:start_pos+grab])
try:
s = sat[s]
except IndexError:
raise CompDocError(
"OLE2 stream %r: sector allocation table invalid entry (%d)" %
(name, s)
)
assert s == EOCSID
if todo != 0:
print("WARNING *** OLE2 stream %r: expected size %d, actual size %d" \
% (name, size, size - todo), file=self.logfile)
return b''.join(sectors)
def _dir_search(self, path, storage_DID=0):
# Return matching DirNode instance, or None
head = path[0]
tail = path[1:]
dl = self.dirlist
for child in dl[storage_DID].children:
if dl[child].name.lower() == head.lower():
et = dl[child].etype
if et == 2:
return dl[child]
if et == 1:
if not tail:
raise CompDocError("Requested component is a 'storage'")
return self._dir_search(tail, child)
dl[child].dump(1)
raise CompDocError("Requested stream is not a 'user stream'")
return None
##
# Interrogate the compound document's directory; return the stream as a string if found, otherwise
# return None.
# @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
def get_named_stream(self, qname):
d = self._dir_search(qname.split("/"))
if d is None:
return None
if d.tot_size >= self.min_size_std_stream:
return self._get_stream(
self.mem, 512, self.SAT, self.sec_size, d.first_SID,
d.tot_size, name=qname)
else:
return self._get_stream(
self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
d.tot_size, name=qname + " (from SSCS)")
# Interrogate the compound document's directory.
# If the named stream is not found, (None, 0, 0) will be returned.
# If the named stream is found and is contiguous within the original byte sequence ("mem")
# used when the document was opened,
# then (mem, offset_to_start_of_stream, length_of_stream) is returned.
# Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.
# @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
def locate_named_stream(self, qname):
d = self._dir_search(qname.split("/"))
if d is None:
return (None, 0, 0)
if d.tot_size >= self.min_size_std_stream:
return self._locate_stream(self.mem, 512, self.SAT, self.sec_size, d.first_SID, d.tot_size)
else:
return (
self._get_stream(
self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
d.tot_size, qname + " (from SSCS)"),
0,
d.tot_size
)
return (None, 0, 0) # not found
def _locate_stream(self, mem, base, sat, sec_size, start_sid, size):
# print >> self.logfile, "_locate_stream", base, sec_size, start_sid, size
s = start_sid
if s < 0:
raise CompDocError("_locate_stream: start_sid (%d) is -ve" % start_sid)
p = -99 # dummy previous SID
start_pos = -9999
end_pos = -8888
slices = []
while s >= 0:
if s == p+1:
# contiguous sectors
end_pos += sec_size
else:
# start new slice
if p >= 0:
# not first time
slices.append((start_pos, end_pos))
start_pos = base + s * sec_size
end_pos = start_pos + sec_size
p = s
s = sat[s]
assert s == EOCSID
# print >> self.logfile, len(slices) + 1, "slices"
if not slices:
# The stream is contiguous ... just what we like!
return (mem, start_pos, size)
slices.append((start_pos, end_pos))
return (b''.join([mem[start_pos:end_pos] for start_pos, end_pos in slices]), 0, size)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,276 @@
# Author: mozman <mozman@gmx.at>
# Purpose: xfcell -- cell with convenient xf function
# Created: 04.12.2010
# Copyright (C) 2010, Manfred Moitzi
# License: BSD-style licence
"""
The XFCell() object contains the data for one cell.
WARNING: You don't call this class yourself. You access Cell objects
via methods of the Sheet object(s) that you found in the Book object that
was returned when you called xlrd.open_workbook("myfile.xls").
Cell objects have four attributes: `ctype` is an int, `value` (which depends
on `ctype`), `xf_index` and `sheet`, a reference to the containing sheet. If
**formatting_info** is not enabled when the workbook is opened, xf_index will
be **None**.
The following table describes the types of cells and how their values
are represented in Python.
=============== ===== ============ ==========================================
Type symbol Const Python value Note
=============== ===== ============ ==========================================
XL_CELL_EMPTY 0 ""
XL_CELL_TEXT 1 str
XL_CELL_NUMBER 2 float
XL_CELL_DATE 3 float
XL_CELL_BOOLEAN 4 int 1 means TRUE, 0 means FALSE
XL_CELL_ERROR 5 int representing internal Excel codes; for a
text representation, refer to the supplied
dictionary error_text_from_code
XL_CELL_BLANK 6 "" this type will appear only when
open_workbook(..., formatting_info=True)
is used.
=============== ===== ============ ==========================================
"""
import datetime
from .xldate import xldate_as_tuple
from .biffh import XL_CELL_DATE, BaseObject
class XFCell(BaseObject):
""" Extended Cell() class with convenient methods for easy access of cell
properties.
"""
__slots__ = ['sheet', 'ctype', 'value', 'xf']
def __init__(self, ctype, value, xf_index=None, sheet=None):
self.sheet = sheet
self.ctype = ctype
self.value = value
if xf_index is not None:
self.xf = self.book.xf_list[xf_index]
else:
self.xf = None
@property
def book(self):
return self.sheet.book
@property
def has_xf(self):
return (self.xf is not None)
@property
def xf_index(self):
if self.has_xf:
return self.xf.xf_index
else:
return None
@property
def parent_style(self):
return self.book.xf_list[self.xf.parent_style_index]
@property
def is_datetime(self):
return self.ctype == XL_CELL_DATE
@property
def has_date(self):
if self.is_datetime:
return self.value > 1.
return False
def get_color(self, index):
return self.book.colour_map[index]
def datetime(self):
""" Returns a datetime.datetime object if cell type is XL_CELL_DATE
else raises a TypeError, and raises ValueError if the the cell has
not date value (only time value is present).
"""
if self.is_datetime:
if self.has_date:
date = xldate_as_tuple(self.value, self.book.datemode)
return datetime.datetime(*date)
else:
raise ValueError("Cell has no date value.")
else:
raise TypeError("Cell is not a XL_CELL_DATE.")
def date(self):
""" Returns a datetime.date object if cell type is XL_CELL_DATE
else raises a **TypeError**. Raises **ValueError** if the cell
doesn't have a date value (only time value is present).
"""
dt = self.datetime()
return dt.date()
def time(self):
""" Returns a datetime.time object if cell type is XL_CELL_DATE else
raises a TypeError.
"""
if self.is_datetime:
date = xldate_as_tuple(self.value, self.book.datemode)
return datetime.time(date[3], date[4], date[5])
else:
raise TypeError("Cell is not a XL_CELL_DATE.")
#
# access the XFBackground() class
#
@property
def background(self):
if self.xf.is_style and \
self.xf._background_flag == 0:
return self.xf.background
elif self.xf._background_flag:
return self.xf.background
else:
return self.parent_style.background
def background_color(self):
""" Get cell background-color as 3-tuple. """
color_index = self.xf.background.background_colour_index
return self.get_color(color_index)
def fill_pattern(self):
return self.xf.background.fill_pattern
def pattern_color(self):
color_index = self.xf.background.pattern_colour_index
return self.get_color(color_index)
#
# access the Font() class
#
@property
def font_index(self):
if self.xf.is_style and \
self.xf._font_flag == 0:
return self.xf.font_index
elif self.xf._font_flag:
return self.xf.font_index
else:
return self.parent_style.font_index
@property
def font(self):
""" Get the Font() class. """
return self.book.font_list[self.xf.font_index]
def font_color(self):
""" Get cell foreground-color as 3-tuple. """
return self.get_color(self.font.colour_index)
#
# access the Format() class
#
@property
def format_key(self):
if self.xf.is_style and \
self.xf._format_flag == 0:
return self.xf.format_key
elif self.xf._format_flag:
return self.xf.format_key
else:
return self.parent_style.format_key
@property
def format(self):
""" Get the Format() class. """
return self.book.format_map[self.format_key]
def format_str(self):
""" Get the associated 'format_str'. """
return self.format.format_str
#
# access the XFAligment() class
#
@property
def alignment(self):
if self.xf.is_style and \
self.xf._alignment_flag == 0:
return self.xf.alignment
elif self.xf._alignment_flag:
return self.xf.alignment
else:
return self.parent_style.alignment
#
# access the XFBorder() class
#
@property
def border(self):
if self.xf.is_style and \
self.xf._border_flag == 0:
return self.xf.border
elif self.xf._border_flag:
return self.xf.border
else:
return self.parent_style.border
def bordercolors(self):
""" Get border color as dict of rgb-color-tuples. """
border = self.border
return {
'top': self.get_color(border.top_colour_index),
'bottom': self.get_color(border.bottom_colour_index),
'left': self.get_color(border.left_colour_index),
'right': self.get_color(border.right_colour_index),
'diag': self.get_color(border.diag_colour_index),
}
def borderstyles(self):
""" Get border styles as dict of ints. """
border = self.border
return {
'top': border.top_line_style,
'bottom': border.bottom_line_style,
'left': border.left_line_style,
'right': border.right_line_style,
'diag': border.diag_line_style,
}
@property
def has_up_diag(self):
""" Draw a line across the cell from bottom left to top right. """
return bool(self.border.diag_up)
@property
def has_down_diag(self):
""" Draw a line across the cell from top left to bottom right. """
return bool(self.border.diag_down)
#
# access the XFProtection() class
#
@property
def protection(self):
if self.xf.is_style and \
self.xf._protection_flag == 0:
return self.xf.protection
elif self.xf._protection_flag:
return self.xf.protection
else:
return self.parent_style.protection
@property
def is_cell_locked(self):
return bool(self.protection.cell_locked)
@property
def is_formula_hidden(self):
return bool(self.protection.cell_locked)

View File

@ -0,0 +1,84 @@
# Author: mozman <mozman@gmx.at>
# Purpose: xfconst -- xf constants
# Created: 05.12.2010
# Copyright (C) 2010, Manfred Moitzi
# License: BSD-style licence
# excelfileformat section 5.115.1 pg. 219
HOR_ALIGN_GENERAL = 0
HOR_ALIGN_LEFT = 1
HOR_ALIGN_CENTRED = 2
HOR_ALIGN_RIGHT = 3
HOR_ALIGN_FILLED = 4
HOR_ALIGN_JUSTIFIED = 5
HOR_ALIGN_CENTRED_ACROSS_SELECTION = 6
HOR_ALIGN_DISTRIBUTED = 7
# excelfileformat section 5.115.1 pg. 220
VERT_ALIGN_TOP = 0
VERT_ALIGN_CENTRED = 1
VERT_ALIGN_BOTTOM = 2
VERT_ALIGN_JUSTIFIED = 3
VERT_ALIGN_DISTRIBUTED = 4
# excelfileformat section 5.115.1 pg. 220
ORIENTATION_NONE = 0
ORIENTATION_STACKED = 1
ORIENTATION_90_COUNTERCLOCKWISE = 2
ORIENTATION_90_CLOCKWISE = 3
# excelfileformat section 5.115.1 pg. 220
ROTATION_NONE = 0
ROTATION_STACKED = 255
# other values:
# Value Description
# ======= =================================
# 1-90 1 to 90 degrees counterclockwise
# 91-180 1 to 90 degrees clockwise
# excelfileformat section 2.5.11 Line Styles for Cell Borders pg. 24
LS_NOLINE = 0
# solid line
LS_THIN = 1
# solid line
LS_MEDIUM = 2
# - - - - - - - - -
LS_DASHED = 3
# .................
LS_DOTTED = 4
# solid line
LS_THICK = 5
# =================
LS_DOUBLE = 6
# very thin dotted
LS_HAIR = 7
# - - - - - - - - -
LS_MEDIUM_DASHED = 8
# - . - . - . - . -
LS_THIN_DASH_DOTTED = 9
# - . - . - . - . -
LS_MEDIUM_DASH_DOTTED = 10
# - .. - .. - .. - .. -
LS_THIN_DASH_DOT_DOTTED = 11
# - .. - .. - .. - .. -
LS_MEDIUM_DASH_DOT_DOTTED = 12
# \\\ . \\\ . \\\ . \\\ . \\\
LS_SLANTED_MEDIUM_DASH_DOTTED = 13

View File

@ -0,0 +1,167 @@
# No part of the content of this file was derived from the works of David Giffin.
#
# Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd
# This module is part of the xlrd3 package, which is released under a
# BSD-style licence.
#
# Provides function(s) for dealing with Microsoft Excel ™ dates.
#
# 2008-10-18 SJM Fix bug in xldate_from_date_tuple (affected some years after 2099)
#
# The conversion from days to (year, month, day) starts with
# an integral "julian day number" aka JDN.
# FWIW, JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713.
# More importantly:
# Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0
# Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0
def ifd(x, y):
return divmod(x, y)[0]
_JDN_delta = (2415080 - 61, 2416482 - 1)
assert _JDN_delta[1] - _JDN_delta[0] == 1462
class XLDateError(ValueError): pass
class XLDateNegative(XLDateError): pass
class XLDateAmbiguous(XLDateError): pass
class XLDateTooLarge(XLDateError): pass
class XLDateBadDatemode(XLDateError): pass
class XLDateBadTuple(XLDateError): pass
_XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462) # This is equivalent to 10000-01-01
# Convert an Excel number (presumed to represent a date, a datetime or a time) into
# a tuple suitable for feeding to datetime or mx.DateTime constructors.
# @param xldate The Excel number
# @param datemode 0: 1900-based, 1: 1904-based.
# <br>WARNING: when using this function to
# interpret the contents of a workbook, you should pass in the Book.datemode
# attribute of that workbook. Whether
# the workbook has ever been anywhere near a Macintosh is irrelevant.
# @return Gregorian (year, month, day, hour, minute, nearest_second).
# <br>Special case: if 0.0 <= xldate < 1.0, it is assumed to represent a time;
# (0, 0, 0, hour, minute, second) will be returned.
# <br>Note: 1904-01-01 is not regarded as a valid date in the datemode 1 system; its "serial number"
# is zero.
# @throws XLDateNegative xldate < 0.00
# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
# @throws XLDateTooLarge Gregorian year 10000 or later
# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
# @throws XLDateError Covers the 4 specific errors
def xldate_as_tuple(xldate, datemode):
if datemode not in (0, 1):
raise XLDateBadDatemode(datemode)
if xldate == 0.00:
return (0, 0, 0, 0, 0, 0)
if xldate < 0.00:
raise XLDateNegative(xldate)
xldays = int(xldate)
frac = xldate - xldays
seconds = int(round(frac * 86400.0))
assert 0 <= seconds <= 86400
if seconds == 86400:
hour = minute = second = 0
xldays += 1
else:
# second = seconds % 60; minutes = seconds // 60
minutes, second = divmod(seconds, 60)
# minute = minutes % 60; hour = minutes // 60
hour, minute = divmod(minutes, 60)
if xldays >= _XLDAYS_TOO_LARGE[datemode]:
raise XLDateTooLarge(xldate)
if xldays == 0:
return (0, 0, 0, hour, minute, second)
if xldays < 61 and datemode == 0:
raise XLDateAmbiguous(xldate)
jdn = xldays + _JDN_delta[datemode]
yreg = (ifd(ifd(jdn * 4 + 274277, 146097) * 3, 4) + jdn + 1363) * 4 + 3
mp = ifd(yreg % 1461, 4) * 535 + 333
d = ifd(mp % 16384, 535) + 1
# mp /= 16384
mp >>= 14
if mp >= 10:
return (ifd(yreg, 1461) - 4715, mp - 9, d, hour, minute, second)
else:
return (ifd(yreg, 1461) - 4716, mp + 3, d, hour, minute, second)
# === conversions from date/time to xl numbers
def _leap(y):
if y % 4: return 0
if y % 100: return 1
if y % 400: return 0
return 1
_days_in_month = (None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
# Convert a date tuple (year, month, day) to an Excel date.
# @param year Gregorian year.
# @param month 1 <= month <= 12
# @param day 1 <= day <= last day of that (year, month)
# @param datemode 0: 1900-based, 1: 1904-based.
# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
# @throws XLDateBadTuple (year, month, day) is too early/late or has invalid component(s)
# @throws XLDateError Covers the specific errors
def xldate_from_date_tuple(datetuple, datemode):
(year, month, day) = datetuple
if datemode not in (0, 1):
raise XLDateBadDatemode(datemode)
if year == 0 and month == 0 and day == 0:
return 0.00
if not (1900 <= year <= 9999):
raise XLDateBadTuple("Invalid year: %r" % ((year, month, day),))
if not (1 <= month <= 12):
raise XLDateBadTuple("Invalid month: %r" % ((year, month, day),))
if day < 1 \
or (day > _days_in_month[month] and not(day == 29 and month == 2 and _leap(year))):
raise XLDateBadTuple("Invalid day: %r" % ((year, month, day),))
Yp = year + 4716
M = month
if M <= 2:
Yp = Yp - 1
Mp = M + 9
else:
Mp = M - 3
jdn = ifd(1461 * Yp, 4) + ifd(979 * Mp + 16, 32) + \
day - 1364 - ifd(ifd(Yp + 184, 100) * 3, 4)
xldays = jdn - _JDN_delta[datemode]
if xldays <= 0:
raise XLDateBadTuple("Invalid (year, month, day): %r" % ((year, month, day),))
if xldays < 61 and datemode == 0:
raise XLDateAmbiguous("Before 1900-03-01: %r" % ((year, month, day),))
return float(xldays)
# Convert a time tuple (hour, minute, second) to an Excel "date" value (fraction of a day).
# @param hour 0 <= hour < 24
# @param minute 0 <= minute < 60
# @param second 0 <= second < 60
# @throws XLDateBadTuple Out-of-range hour, minute, or second
def xldate_from_time_tuple(timetuple):
(hour, minute, second) = timetuple
if 0 <= hour < 24 and 0 <= minute < 60 and 0 <= second < 60:
return ((second / 60.0 + minute) / 60.0 + hour) / 24.0
raise XLDateBadTuple("Invalid (hour, minute, second): %r" % ((hour, minute, second),))
# Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value.
# For more details, refer to other xldate_from_*_tuple functions.
# @param datetime_tuple (year, month, day, hour, minute, second)
# @param datemode 0: 1900-based, 1: 1904-based.
def xldate_from_datetime_tuple(datetime_tuple, datemode):
return (
xldate_from_date_tuple(datetime_tuple[:3], datemode)
+
xldate_from_time_tuple(datetime_tuple[3:])
)

View File

@ -657,7 +657,7 @@ class TablibTestCase(unittest.TestCase):
self.founders.append(('Old', 'Man', 100500))
self.assertEquals(
u"""
"""
first_name|last_name |gpa
----------|----------|------
John |Adams |90