Changes for Python 3 compatibility, including vendorizing xlrd3
This commit is contained in:
parent
6a825a8a39
commit
a3781e3c89
7
setup.py
7
setup.py
|
@ -49,13 +49,20 @@ setup(
|
|||
'tablib', 'tablib.formats',
|
||||
'tablib.packages',
|
||||
'tablib.packages.xlwt',
|
||||
'tablib.packages.xlwt3',
|
||||
'tablib.packages.xlrd',
|
||||
'tablib.packages.xlrd3',
|
||||
'tablib.packages.omnijson',
|
||||
'tablib.packages.odf',
|
||||
'tablib.packages.odf3',
|
||||
'tablib.packages.openpyxl',
|
||||
'tablib.packages.openpyxl.shared',
|
||||
'tablib.packages.openpyxl.reader',
|
||||
'tablib.packages.openpyxl.writer',
|
||||
'tablib.packages.openpyxl3',
|
||||
'tablib.packages.openpyxl3.shared',
|
||||
'tablib.packages.openpyxl3.reader',
|
||||
'tablib.packages.openpyxl3.writer',
|
||||
'tablib.packages.yaml',
|
||||
'tablib.packages.unicodecsv'
|
||||
],
|
||||
|
|
|
@ -23,6 +23,8 @@ except ImportError:
|
|||
if is_py3:
|
||||
from io import BytesIO
|
||||
import tablib.packages.xlwt3 as xlwt
|
||||
import tablib.packages.xlrd3 as xlrd
|
||||
from tablib.packages.xlrd3.biffh import XLRDError
|
||||
from tablib.packages import markup3 as markup
|
||||
from tablib.packages import openpyxl3 as openpyxl
|
||||
from tablib.packages.odf3 import opendocument, style, text, table
|
||||
|
@ -40,6 +42,7 @@ else:
|
|||
from cStringIO import StringIO
|
||||
import tablib.packages.xlwt as xlwt
|
||||
import tablib.packages.xlrd as xlrd
|
||||
from tablib.packages.xlrd.biffh import XLRDError
|
||||
from tablib.packages import markup
|
||||
from itertools import ifilter
|
||||
from tablib.packages import openpyxl
|
||||
|
@ -47,4 +50,4 @@ else:
|
|||
|
||||
from tablib.packages import unicodecsv as csv
|
||||
|
||||
unicode = unicode
|
||||
unicode = unicode
|
||||
|
|
|
@ -226,19 +226,21 @@ class Dataset(object):
|
|||
def __unicode__(self):
|
||||
result = [self.__headers]
|
||||
|
||||
result.extend(map(unicode, row) for row in self._data)
|
||||
result.extend(list(map(unicode, row)) for row in self._data)
|
||||
|
||||
# here, we calculate max width for each column
|
||||
lens = (map(len, row) for row in result)
|
||||
field_lens = map(max, zip(*lens))
|
||||
lens = (list(map(len, row)) for row in result)
|
||||
field_lens = list(map(max, zip(*lens)))
|
||||
|
||||
# delimiter between header and data
|
||||
result.insert(1, [u'-' * length for length in field_lens])
|
||||
result.insert(1, ['-' * length for length in field_lens])
|
||||
|
||||
format_string = u'|'.join(u'{%s:%s}' % item for item in enumerate(field_lens))
|
||||
format_string = '|'.join('{%s:%s}' % item for item in enumerate(field_lens))
|
||||
|
||||
return u'\n'.join(format_string.format(*row) for row in result)
|
||||
return '\n'.join(format_string.format(*row) for row in result)
|
||||
|
||||
def __str__(self):
|
||||
return self.__unicode__()
|
||||
|
||||
|
||||
# ---------
|
||||
|
|
|
@ -5,9 +5,7 @@
|
|||
|
||||
import sys
|
||||
|
||||
from tablib.compat import BytesIO, xlwt
|
||||
from tablib.packages import xlrd
|
||||
from tablib.packages.xlrd.biffh import XLRDError
|
||||
from tablib.compat import BytesIO, xlwt, xlrd, XLRDError
|
||||
import tablib
|
||||
|
||||
title = 'xls'
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,620 @@
|
|||
# Support module for the xlrd3 package.
|
||||
#
|
||||
# Portions copyright (c) 2005-2008 Stephen John Machin, Lingfo Pty Ltd
|
||||
# This module is part of the xlrd package, which is released under a
|
||||
# BSD-style licence.
|
||||
#
|
||||
# 2010-12-08 mozman refactoring for python 3
|
||||
# 2008-02-10 SJM BIFF2 BLANK record
|
||||
# 2008-02-08 SJM Preparation for Excel 2.0 support
|
||||
# 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for
|
||||
# biff_dump & biff_count
|
||||
# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
|
||||
# 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte.
|
||||
# 2007-04-22 SJM Remove experimental "trimming" facility.
|
||||
|
||||
import sys
|
||||
from struct import unpack
|
||||
|
||||
encoding_from_codepage = {
|
||||
1200 : 'utf_16_le',
|
||||
10000: 'mac_roman',
|
||||
10006: 'mac_greek', # guess
|
||||
10007: 'mac_cyrillic', # guess
|
||||
10029: 'mac_latin2', # guess
|
||||
10079: 'mac_iceland', # guess
|
||||
10081: 'mac_turkish', # guess
|
||||
32768: 'mac_roman',
|
||||
32769: 'cp1252',
|
||||
}
|
||||
|
||||
# some more guessing, for Indic scripts
|
||||
# codepage 57000 range:
|
||||
# 2 Devanagari [0]
|
||||
# 3 Bengali [1]
|
||||
# 4 Tamil [5]
|
||||
# 5 Telegu [6]
|
||||
# 6 Assamese [1] c.f. Bengali
|
||||
# 7 Oriya [4]
|
||||
# 8 Kannada [7]
|
||||
# 9 Malayalam [8]
|
||||
# 10 Gujarati [3]
|
||||
# 11 Gurmukhi [2]
|
||||
|
||||
FUN = 0 # unknown
|
||||
FDT = 1 # date
|
||||
FNU = 2 # number
|
||||
FGE = 3 # general
|
||||
FTX = 4 # text
|
||||
|
||||
DATEFORMAT = FDT
|
||||
NUMBERFORMAT = FNU
|
||||
|
||||
XL_CELL_EMPTY = 0
|
||||
XL_CELL_TEXT = 1
|
||||
XL_CELL_NUMBER = 2
|
||||
XL_CELL_DATE = 3
|
||||
XL_CELL_BOOLEAN = 4
|
||||
XL_CELL_ERROR = 5
|
||||
XL_CELL_BLANK = 6 # for use in debugging, gathering stats, etc
|
||||
|
||||
biff_text_from_num = {
|
||||
0: "(not BIFF)",
|
||||
20: "2.0",
|
||||
21: "2.1",
|
||||
30: "3",
|
||||
40: "4S",
|
||||
45: "4W",
|
||||
50: "5",
|
||||
70: "7",
|
||||
80: "8",
|
||||
85: "8X",
|
||||
}
|
||||
|
||||
# This dictionary can be used to produce a text version of the internal codes
|
||||
# that Excel uses for error cells. Here are its contents:
|
||||
error_text_from_code = {
|
||||
0x00: '#NULL!', # Intersection of two cell ranges is empty
|
||||
0x07: '#DIV/0!', # Division by zero
|
||||
0x0F: '#VALUE!', # Wrong type of operand
|
||||
0x17: '#REF!', # Illegal or deleted cell reference
|
||||
0x1D: '#NAME?', # Wrong function or range name
|
||||
0x24: '#NUM!', # Value range overflow
|
||||
0x2A: '#N/A!', # Argument or function not available
|
||||
}
|
||||
|
||||
BIFF_FIRST_UNICODE = 80
|
||||
|
||||
XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5
|
||||
XL_WORKBOOK_GLOBALS_4W = 0x100
|
||||
XL_WORKSHEET = WRKSHEET = 0x10
|
||||
|
||||
XL_BOUNDSHEET_WORKSHEET = 0x00
|
||||
XL_BOUNDSHEET_CHART = 0x02
|
||||
XL_BOUNDSHEET_VB_MODULE = 0x06
|
||||
|
||||
# XL_RK2 = 0x7e
|
||||
XL_ARRAY = 0x0221
|
||||
XL_ARRAY2 = 0x0021
|
||||
XL_BLANK = 0x0201
|
||||
XL_BLANK_B2 = 0x01
|
||||
XL_BOF = 0x809
|
||||
XL_BOOLERR = 0x205
|
||||
XL_BOOLERR_B2 = 0x5
|
||||
XL_BOUNDSHEET = 0x85
|
||||
XL_BUILTINFMTCOUNT = 0x56
|
||||
XL_CF = 0x01B1
|
||||
XL_CODEPAGE = 0x42
|
||||
XL_COLINFO = 0x7D
|
||||
XL_COLUMNDEFAULT = 0x20 # BIFF2 only
|
||||
XL_COLWIDTH = 0x24 # BIFF2 only
|
||||
XL_CONDFMT = 0x01B0
|
||||
XL_CONTINUE = 0x3c
|
||||
XL_COUNTRY = 0x8C
|
||||
XL_DATEMODE = 0x22
|
||||
XL_DEFAULTROWHEIGHT = 0x0225
|
||||
XL_DEFCOLWIDTH = 0x55
|
||||
XL_DIMENSION = 0x200
|
||||
XL_DIMENSION2 = 0x0
|
||||
XL_EFONT = 0x45
|
||||
XL_EOF = 0x0a
|
||||
XL_EXTERNNAME = 0x23
|
||||
XL_EXTERNSHEET = 0x17
|
||||
XL_EXTSST = 0xff
|
||||
XL_FEAT11 = 0x872
|
||||
XL_FILEPASS = 0x2f
|
||||
XL_FONT = 0x31
|
||||
XL_FONT_B3B4 = 0x231
|
||||
XL_FORMAT = 0x41e
|
||||
XL_FORMAT2 = 0x1E # BIFF2, BIFF3
|
||||
XL_FORMULA = 0x6
|
||||
XL_FORMULA3 = 0x206
|
||||
XL_FORMULA4 = 0x406
|
||||
XL_GCW = 0xab
|
||||
XL_INDEX = 0x20b
|
||||
XL_INTEGER = 0x2 # BIFF2 only
|
||||
XL_IXFE = 0x44 # BIFF2 only
|
||||
XL_LABEL = 0x204
|
||||
XL_LABEL_B2 = 0x04
|
||||
XL_LABELRANGES = 0x15f
|
||||
XL_LABELSST = 0xfd
|
||||
XL_MERGEDCELLS = 0xE5
|
||||
XL_MSO_DRAWING = 0x00EC
|
||||
XL_MSO_DRAWING_GROUP = 0x00EB
|
||||
XL_MSO_DRAWING_SELECTION = 0x00ED
|
||||
XL_MULRK = 0xbd
|
||||
XL_MULBLANK = 0xbe
|
||||
XL_NAME = 0x18
|
||||
XL_NOTE = 0x1c
|
||||
XL_NUMBER = 0x203
|
||||
XL_NUMBER_B2 = 0x3
|
||||
XL_OBJ = 0x5D
|
||||
XL_PALETTE = 0x92
|
||||
XL_RK = 0x27e
|
||||
XL_ROW = 0x208
|
||||
XL_ROW_B2 = 0x08
|
||||
XL_RSTRING = 0xd6
|
||||
XL_SHEETHDR = 0x8F # BIFF4W only
|
||||
XL_SHEETSOFFSET = 0x8E # BIFF4W only
|
||||
XL_SHRFMLA = 0x04bc
|
||||
XL_SST = 0xfc
|
||||
XL_STANDARDWIDTH = 0x99
|
||||
XL_STRING = 0x207
|
||||
XL_STRING_B2 = 0x7
|
||||
XL_STYLE = 0x293
|
||||
XL_SUPBOOK = 0x1AE
|
||||
XL_TABLEOP = 0x236
|
||||
XL_TABLEOP2 = 0x37
|
||||
XL_TABLEOP_B2 = 0x36
|
||||
XL_TXO = 0x1b6
|
||||
XL_UNCALCED = 0x5e
|
||||
XL_UNKNOWN = 0xffff
|
||||
XL_WINDOW2 = 0x023E
|
||||
XL_WRITEACCESS = 0x5C
|
||||
XL_XF = 0xe0
|
||||
XL_XF2 = 0x0043 # BIFF2 version of XF record
|
||||
XL_XF3 = 0x0243 # BIFF3 version of XF record
|
||||
XL_XF4 = 0x0443 # BIFF4 version of XF record
|
||||
|
||||
boflen = {
|
||||
0x0809: 8,
|
||||
0x0409: 6,
|
||||
0x0209: 6,
|
||||
0x0009: 4,
|
||||
}
|
||||
|
||||
bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)
|
||||
|
||||
XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)
|
||||
|
||||
_cell_opcode_list = (
|
||||
XL_BOOLERR,
|
||||
XL_FORMULA,
|
||||
XL_FORMULA3,
|
||||
XL_FORMULA4,
|
||||
XL_LABEL,
|
||||
XL_LABELSST,
|
||||
XL_MULRK,
|
||||
XL_NUMBER,
|
||||
XL_RK,
|
||||
XL_RSTRING,
|
||||
)
|
||||
|
||||
biff_rec_name_dict = {
|
||||
0x0000: 'DIMENSIONS_B2',
|
||||
0x0001: 'BLANK_B2',
|
||||
0x0002: 'INTEGER_B2_ONLY',
|
||||
0x0003: 'NUMBER_B2',
|
||||
0x0004: 'LABEL_B2',
|
||||
0x0005: 'BOOLERR_B2',
|
||||
0x0006: 'FORMULA',
|
||||
0x0007: 'STRING_B2',
|
||||
0x0008: 'ROW_B2',
|
||||
0x0009: 'BOF_B2',
|
||||
0x000A: 'EOF',
|
||||
0x000B: 'INDEX_B2_ONLY',
|
||||
0x000C: 'CALCCOUNT',
|
||||
0x000D: 'CALCMODE',
|
||||
0x000E: 'PRECISION',
|
||||
0x000F: 'REFMODE',
|
||||
0x0010: 'DELTA',
|
||||
0x0011: 'ITERATION',
|
||||
0x0012: 'PROTECT',
|
||||
0x0013: 'PASSWORD',
|
||||
0x0014: 'HEADER',
|
||||
0x0015: 'FOOTER',
|
||||
0x0016: 'EXTERNCOUNT',
|
||||
0x0017: 'EXTERNSHEET',
|
||||
0x0018: 'NAME_B2,5+',
|
||||
0x0019: 'WINDOWPROTECT',
|
||||
0x001A: 'VERTICALPAGEBREAKS',
|
||||
0x001B: 'HORIZONTALPAGEBREAKS',
|
||||
0x001C: 'NOTE',
|
||||
0x001D: 'SELECTION',
|
||||
0x001E: 'FORMAT_B2-3',
|
||||
0x001F: 'BUILTINFMTCOUNT_B2',
|
||||
0x0020: 'COLUMNDEFAULT_B2_ONLY',
|
||||
0x0021: 'ARRAY_B2_ONLY',
|
||||
0x0022: 'DATEMODE',
|
||||
0x0023: 'EXTERNNAME',
|
||||
0x0024: 'COLWIDTH_B2_ONLY',
|
||||
0x0025: 'DEFAULTROWHEIGHT_B2_ONLY',
|
||||
0x0026: 'LEFTMARGIN',
|
||||
0x0027: 'RIGHTMARGIN',
|
||||
0x0028: 'TOPMARGIN',
|
||||
0x0029: 'BOTTOMMARGIN',
|
||||
0x002A: 'PRINTHEADERS',
|
||||
0x002B: 'PRINTGRIDLINES',
|
||||
0x002F: 'FILEPASS',
|
||||
0x0031: 'FONT',
|
||||
0x0032: 'FONT2_B2_ONLY',
|
||||
0x0036: 'TABLEOP_B2',
|
||||
0x0037: 'TABLEOP2_B2',
|
||||
0x003C: 'CONTINUE',
|
||||
0x003D: 'WINDOW1',
|
||||
0x003E: 'WINDOW2_B2',
|
||||
0x0040: 'BACKUP',
|
||||
0x0041: 'PANE',
|
||||
0x0042: 'CODEPAGE',
|
||||
0x0043: 'XF_B2',
|
||||
0x0044: 'IXFE_B2_ONLY',
|
||||
0x0045: 'EFONT_B2_ONLY',
|
||||
0x004D: 'PLS',
|
||||
0x0051: 'DCONREF',
|
||||
0x0055: 'DEFCOLWIDTH',
|
||||
0x0056: 'BUILTINFMTCOUNT_B3-4',
|
||||
0x0059: 'XCT',
|
||||
0x005A: 'CRN',
|
||||
0x005B: 'FILESHARING',
|
||||
0x005C: 'WRITEACCESS',
|
||||
0x005D: 'OBJECT',
|
||||
0x005E: 'UNCALCED',
|
||||
0x005F: 'SAVERECALC',
|
||||
0x0063: 'OBJECTPROTECT',
|
||||
0x007D: 'COLINFO',
|
||||
0x007E: 'RK2_mythical_?',
|
||||
0x0080: 'GUTS',
|
||||
0x0081: 'WSBOOL',
|
||||
0x0082: 'GRIDSET',
|
||||
0x0083: 'HCENTER',
|
||||
0x0084: 'VCENTER',
|
||||
0x0085: 'BOUNDSHEET',
|
||||
0x0086: 'WRITEPROT',
|
||||
0x008C: 'COUNTRY',
|
||||
0x008D: 'HIDEOBJ',
|
||||
0x008E: 'SHEETSOFFSET',
|
||||
0x008F: 'SHEETHDR',
|
||||
0x0090: 'SORT',
|
||||
0x0092: 'PALETTE',
|
||||
0x0099: 'STANDARDWIDTH',
|
||||
0x009B: 'FILTERMODE',
|
||||
0x009C: 'FNGROUPCOUNT',
|
||||
0x009D: 'AUTOFILTERINFO',
|
||||
0x009E: 'AUTOFILTER',
|
||||
0x00A0: 'SCL',
|
||||
0x00A1: 'SETUP',
|
||||
0x00AB: 'GCW',
|
||||
0x00BD: 'MULRK',
|
||||
0x00BE: 'MULBLANK',
|
||||
0x00C1: 'MMS',
|
||||
0x00D6: 'RSTRING',
|
||||
0x00D7: 'DBCELL',
|
||||
0x00DA: 'BOOKBOOL',
|
||||
0x00DD: 'SCENPROTECT',
|
||||
0x00E0: 'XF',
|
||||
0x00E1: 'INTERFACEHDR',
|
||||
0x00E2: 'INTERFACEEND',
|
||||
0x00E5: 'MERGEDCELLS',
|
||||
0x00E9: 'BITMAP',
|
||||
0x00EB: 'MSO_DRAWING_GROUP',
|
||||
0x00EC: 'MSO_DRAWING',
|
||||
0x00ED: 'MSO_DRAWING_SELECTION',
|
||||
0x00EF: 'PHONETIC',
|
||||
0x00FC: 'SST',
|
||||
0x00FD: 'LABELSST',
|
||||
0x00FF: 'EXTSST',
|
||||
0x013D: 'TABID',
|
||||
0x015F: 'LABELRANGES',
|
||||
0x0160: 'USESELFS',
|
||||
0x0161: 'DSF',
|
||||
0x01AE: 'SUPBOOK',
|
||||
0x01AF: 'PROTECTIONREV4',
|
||||
0x01B0: 'CONDFMT',
|
||||
0x01B1: 'CF',
|
||||
0x01B2: 'DVAL',
|
||||
0x01B6: 'TXO',
|
||||
0x01B7: 'REFRESHALL',
|
||||
0x01B8: 'HLINK',
|
||||
0x01BC: 'PASSWORDREV4',
|
||||
0x01BE: 'DV',
|
||||
0x01C0: 'XL9FILE',
|
||||
0x01C1: 'RECALCID',
|
||||
0x0200: 'DIMENSIONS',
|
||||
0x0201: 'BLANK',
|
||||
0x0203: 'NUMBER',
|
||||
0x0204: 'LABEL',
|
||||
0x0205: 'BOOLERR',
|
||||
0x0206: 'FORMULA_B3',
|
||||
0x0207: 'STRING',
|
||||
0x0208: 'ROW',
|
||||
0x0209: 'BOF',
|
||||
0x020B: 'INDEX_B3+',
|
||||
0x0218: 'NAME',
|
||||
0x0221: 'ARRAY',
|
||||
0x0223: 'EXTERNNAME_B3-4',
|
||||
0x0225: 'DEFAULTROWHEIGHT',
|
||||
0x0231: 'FONT_B3B4',
|
||||
0x0236: 'TABLEOP',
|
||||
0x023E: 'WINDOW2',
|
||||
0x0243: 'XF_B3',
|
||||
0x027E: 'RK',
|
||||
0x0293: 'STYLE',
|
||||
0x0406: 'FORMULA_B4',
|
||||
0x0409: 'BOF',
|
||||
0x041E: 'FORMAT',
|
||||
0x0443: 'XF_B4',
|
||||
0x04BC: 'SHRFMLA',
|
||||
0x0800: 'QUICKTIP',
|
||||
0x0809: 'BOF',
|
||||
0x0862: 'SHEETLAYOUT',
|
||||
0x0867: 'SHEETPROTECTION',
|
||||
0x0868: 'RANGEPROTECTION',
|
||||
}
|
||||
|
||||
class XLRDError(Exception):
|
||||
pass
|
||||
|
||||
class BaseObject:
|
||||
"""
|
||||
Parent of almost all other classes in the package. Defines a common
|
||||
'dump' method for debugging.
|
||||
"""
|
||||
_repr_these = []
|
||||
|
||||
def dump(self, f=None, header=None, footer=None, indent=0):
|
||||
"""
|
||||
:param f: open file object, to which the dump is written
|
||||
:param header: text to write before the dump
|
||||
:param footer: text to write after the dump
|
||||
:param indent: number of leading spaces (for recursive calls)
|
||||
"""
|
||||
if f is None:
|
||||
f = sys.stderr
|
||||
pad = " " * indent
|
||||
|
||||
if header is not None:
|
||||
print(header, file=f)
|
||||
|
||||
for attr, value in sorted(self.__dict__.items()):
|
||||
if getattr(value, 'dump', None) and attr != 'book':
|
||||
value.dump(f,
|
||||
header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),
|
||||
indent=indent+4)
|
||||
elif attr not in self._repr_these and \
|
||||
(isinstance(value, list) or
|
||||
isinstance(value, dict)):
|
||||
print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f)
|
||||
else:
|
||||
print("%s%s: %r" % (pad, attr, value), file=f)
|
||||
if footer is not None:
|
||||
print(footer, file=f)
|
||||
|
||||
def fprintf(f, fmt, *vargs):
|
||||
print(fmt.rstrip('\n') % vargs, file=f)
|
||||
|
||||
def upkbits(tgt_obj, src, manifest, local_setattr=setattr):
|
||||
for n, mask, attr in manifest:
|
||||
local_setattr(tgt_obj, attr, (src & mask) >> n)
|
||||
|
||||
def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):
|
||||
for n, mask, attr in manifest:
|
||||
local_setattr(tgt_obj, attr, local_int((src & mask) >> n))
|
||||
|
||||
def unpack_string(data, pos, encoding, lenlen=1):
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
return str(data[pos:pos+nchars], encoding)
|
||||
|
||||
def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):
|
||||
if known_len is not None:
|
||||
# On a NAME record, the length byte is detached from the front of the string.
|
||||
nchars = known_len
|
||||
else:
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
|
||||
newpos = pos + nchars
|
||||
return (str(data[pos:newpos], encoding), newpos)
|
||||
|
||||
def unpack_unicode(data, pos, lenlen=2):
|
||||
""" Return unicode_strg """
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
if not nchars:
|
||||
# Ambiguous whether 0-length string should have an "options" byte.
|
||||
# Avoid crash if missing.
|
||||
return ""
|
||||
pos += lenlen
|
||||
options = data[pos]
|
||||
pos += 1
|
||||
|
||||
if options & 0x08: # richtext
|
||||
pos += 2
|
||||
|
||||
if options & 0x04: # phonetic
|
||||
pos += 4
|
||||
|
||||
if options & 0x01:
|
||||
# Uncompressed UTF-16-LE
|
||||
rawstrg = data[pos:pos+2*nchars]
|
||||
strg = str(rawstrg, 'utf_16_le')
|
||||
else:
|
||||
# Note: this is COMPRESSED (not ASCII!) encoding!!!
|
||||
# Merely returning the raw bytes would work OK 99.99% of the time
|
||||
# if the local codepage was cp1252 -- however this would rapidly go pear-shaped
|
||||
# for other codepages so we grit our Anglocentric teeth and return Unicode :-)
|
||||
strg = str(data[pos:pos+nchars], "latin_1")
|
||||
return strg
|
||||
|
||||
def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):
|
||||
""" Return (unicode_strg, updated value of pos) """
|
||||
if known_len is not None:
|
||||
# On a NAME record, the length byte is detached from the front of the string.
|
||||
nchars = known_len
|
||||
else:
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
|
||||
if not nchars and not data[pos:]:
|
||||
# Zero-length string with no options byte
|
||||
return ("", pos)
|
||||
|
||||
options = data[pos]
|
||||
pos += 1
|
||||
phonetic = options & 0x04
|
||||
richtext = options & 0x08
|
||||
|
||||
if richtext:
|
||||
rt = unpack('<H', data[pos:pos+2])[0]
|
||||
pos += 2
|
||||
|
||||
if phonetic:
|
||||
sz = unpack('<i', data[pos:pos+4])[0]
|
||||
pos += 4
|
||||
|
||||
if options & 0x01:
|
||||
# Uncompressed UTF-16-LE
|
||||
strg = str(data[pos:pos+2*nchars], 'utf_16_le')
|
||||
pos += 2*nchars
|
||||
else:
|
||||
# Note: this is COMPRESSED (not ASCII!) encoding!!!
|
||||
strg = str(data[pos:pos+nchars], "latin_1")
|
||||
pos += nchars
|
||||
|
||||
if richtext:
|
||||
pos += 4 * rt
|
||||
|
||||
if phonetic:
|
||||
pos += sz
|
||||
|
||||
return (strg, pos)
|
||||
|
||||
def unpack_cell_range_address_list_update_pos(
|
||||
output_list, data, pos, biff_version, addr_size=6):
|
||||
# output_list is updated in situ
|
||||
if biff_version < 80:
|
||||
assert addr_size == 6
|
||||
else:
|
||||
assert addr_size in (6, 8)
|
||||
n, = unpack("<H", data[pos:pos+2])
|
||||
pos += 2
|
||||
if n:
|
||||
fmt = "<HHBB" if addr_size == 6 else "<HHHH"
|
||||
for _unused in range(n):
|
||||
ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])
|
||||
output_list.append((ra, rb+1, ca, cb+1))
|
||||
pos += addr_size
|
||||
return pos
|
||||
|
||||
def hex_char_dump(strg, ofs, dlen, base=0, fout=sys.stdout, unnumbered=False):
|
||||
endpos = min(ofs + dlen, len(strg))
|
||||
pos = ofs
|
||||
numbered = not unnumbered
|
||||
num_prefix = ''
|
||||
while pos < endpos:
|
||||
endsub = min(pos + 16, endpos)
|
||||
substrg = strg[pos:endsub]
|
||||
lensub = endsub - pos
|
||||
if lensub <= 0 or lensub != len(substrg):
|
||||
fprintf(
|
||||
sys.stdout,
|
||||
'??? hex_char_dump: ofs=%d dlen=%d base=%d -> endpos=%d pos=%d endsub=%d substrg=%r\n',
|
||||
ofs, dlen, base, endpos, pos, endsub, substrg)
|
||||
break
|
||||
hexd = ''.join(["%02x " % c for c in substrg])
|
||||
chard = ''
|
||||
for c in substrg:
|
||||
if c == ord('\0'):
|
||||
c = '~'
|
||||
elif not (' ' <= chr(c) <= '~'):
|
||||
c = '?'
|
||||
if isinstance(c, int):
|
||||
c = chr(c)
|
||||
chard += c
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (base+pos-ofs)
|
||||
fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard)
|
||||
pos = endsub
|
||||
|
||||
def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout,
|
||||
unnumbered=False):
|
||||
pos = stream_offset
|
||||
stream_end = stream_offset + stream_len
|
||||
adj = base - stream_offset
|
||||
dummies = 0
|
||||
numbered = not unnumbered
|
||||
num_prefix = ''
|
||||
while stream_end - pos >= 4:
|
||||
rc, length = unpack('<HH', mem[pos:pos+4])
|
||||
if rc == 0 and length == 0:
|
||||
if mem[pos:] == '\0' * (stream_end - pos):
|
||||
dummies = stream_end - pos
|
||||
savpos = pos
|
||||
pos = stream_end
|
||||
break
|
||||
|
||||
if dummies:
|
||||
dummies += 4
|
||||
else:
|
||||
savpos = pos
|
||||
dummies = 4
|
||||
pos += 4
|
||||
else:
|
||||
if dummies:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + savpos)
|
||||
fprintf(fout, "%s---- %d zero bytes skipped ----\n",
|
||||
num_prefix, dummies)
|
||||
dummies = 0
|
||||
|
||||
recname = biff_rec_name_dict.get(rc, '<UNKNOWN>')
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + pos)
|
||||
fprintf(fout, "%s%04x %s len = %04x (%d)\n",
|
||||
num_prefix, rc, recname, length, length)
|
||||
pos += 4
|
||||
hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered)
|
||||
pos += length
|
||||
if dummies:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + savpos)
|
||||
fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
|
||||
|
||||
if pos < stream_end:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + pos)
|
||||
fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix)
|
||||
hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered)
|
||||
elif pos > stream_end:
|
||||
fprintf(fout, "Last dumped record has length (%d) that is too large\n", length)
|
||||
|
||||
def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout):
|
||||
pos = stream_offset
|
||||
stream_end = stream_offset + stream_len
|
||||
tally = {}
|
||||
while stream_end - pos >= 4:
|
||||
rc, length = unpack('<HH', mem[pos:pos+4])
|
||||
if rc == 0 and length == 0:
|
||||
if mem[pos:] == '\0' * (stream_end - pos):
|
||||
break
|
||||
recname = "<Dummy (zero)>"
|
||||
else:
|
||||
recname = biff_rec_name_dict.get(rc, None)
|
||||
if recname is None:
|
||||
recname = "Unknown_0x%04X" % rc
|
||||
if recname in tally:
|
||||
tally[recname] += 1
|
||||
else:
|
||||
tally[recname] = 1
|
||||
pos += length + 4
|
||||
for recname, count in sorted(tally.items()):
|
||||
fprintf(fout, "%8d %s", count, recname)
|
|
@ -0,0 +1,346 @@
|
|||
# Implements the minimal functionality required
|
||||
# to extract a "Workbook" or "Book" stream (as one big string)
|
||||
# from an OLE2 Compound Document file.
|
||||
#
|
||||
# Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd
|
||||
# This module is part of the xlrd3 package, which is released under a BSD-style licence.
|
||||
|
||||
# No part of the content of this file was derived from the works of David Giffin.
|
||||
|
||||
# 2008-11-04 SJM Avoid assertion error when -1 used instead of -2 for first_SID of empty SCSS [Frank Hoffsuemmer]
|
||||
# 2007-09-08 SJM Warning message if sector sizes are extremely large.
|
||||
# 2007-05-07 SJM Meaningful exception instead of IndexError if a SAT (sector allocation table) is corrupted.
|
||||
# 2007-04-22 SJM Missing "<" in a struct.unpack call => can't open files on bigendian platforms.
|
||||
|
||||
|
||||
import sys
|
||||
from struct import unpack
|
||||
|
||||
# Magic cookie that should appear in the first 8 bytes of the file.
|
||||
SIGNATURE = b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"
|
||||
|
||||
EOCSID = -2
|
||||
FREESID = -1
|
||||
SATSID = -3
|
||||
MSATSID = -4
|
||||
|
||||
def int_floor_div(x, y):
|
||||
return divmod(x, y)[0]
|
||||
|
||||
class CompDocError(Exception):
|
||||
pass
|
||||
|
||||
class DirNode(object):
|
||||
|
||||
def __init__(self, DID, dent, DEBUG=0):
|
||||
# dent is the 128-byte directory entry
|
||||
self.DID = DID
|
||||
# (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
|
||||
# self.root_DID,
|
||||
# self.first_SID,
|
||||
# self.tot_size) = \
|
||||
# unpack('<HBBiii16x4x8x8xii4x', dent[64:128])
|
||||
(cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
|
||||
self.root_DID) = \
|
||||
unpack('<HBBiii', dent[64:80])
|
||||
(self.first_SID, self.tot_size) = \
|
||||
unpack('<ii', dent[116:124])
|
||||
if cbufsize == 0:
|
||||
self.name = ''
|
||||
else:
|
||||
self.name = str(dent[0:cbufsize-2], 'utf_16_le') # omit the trailing U+0000
|
||||
self.children = [] # filled in later
|
||||
self.parent = -1 # indicates orphan; fixed up later
|
||||
self.tsinfo = unpack('<IIII', dent[100:116])
|
||||
if DEBUG:
|
||||
self.dump(DEBUG)
|
||||
|
||||
def dump(self, DEBUG=1):
|
||||
print("DID=%d name=%r etype=%d DIDs(left=%d right=%d root=%d parent=%d kids=%r) first_SID=%d tot_size=%d" \
|
||||
% (self.DID, self.name, self.etype, self.left_DID,
|
||||
self.right_DID, self.root_DID, self.parent, self.children, self.first_SID, self.tot_size))
|
||||
if DEBUG == 2:
|
||||
# cre_lo, cre_hi, mod_lo, mod_hi = tsinfo
|
||||
print("timestamp info", self.tsinfo)
|
||||
|
||||
def _build_family_tree(dirlist, parent_DID, child_DID):
|
||||
if child_DID < 0: return
|
||||
_build_family_tree(dirlist, parent_DID, dirlist[child_DID].left_DID)
|
||||
dirlist[parent_DID].children.append(child_DID)
|
||||
dirlist[child_DID].parent = parent_DID
|
||||
_build_family_tree(dirlist, parent_DID, dirlist[child_DID].right_DID)
|
||||
if dirlist[child_DID].etype == 1: # storage
|
||||
_build_family_tree(dirlist, child_DID, dirlist[child_DID].root_DID)
|
||||
|
||||
# Compound document handler.
|
||||
# @param mem The raw contents of the file, as a string, or as an mmap.mmap() object. The
|
||||
# only operation it needs to support is slicing.
|
||||
|
||||
class CompDoc(object):
|
||||
|
||||
def __init__(self, mem, logfile=sys.stdout, DEBUG=0):
|
||||
self.logfile = logfile
|
||||
if mem[0:8] != SIGNATURE:
|
||||
raise CompDocError('Not an OLE2 compound document')
|
||||
if mem[28:30] != b'\xFE\xFF':
|
||||
raise CompDocError('Expected "little-endian" marker, found %r' % mem[28:30])
|
||||
revision, version = unpack('<HH', mem[24:28])
|
||||
if DEBUG:
|
||||
print("\nCompDoc format: version=0x%04x revision=0x%04x" % (version, revision), file=logfile)
|
||||
self.mem = mem
|
||||
ssz, sssz = unpack('<HH', mem[30:34])
|
||||
if ssz > 20: # allows for 2**20 bytes i.e. 1MB
|
||||
print("WARNING: sector size (2**%d) is preposterous; assuming 512 and continuing ..." \
|
||||
% ssz, file=logfile)
|
||||
ssz = 9
|
||||
if sssz > ssz:
|
||||
print("WARNING: short stream sector size (2**%d) is preposterous; assuming 64 and continuing ..." \
|
||||
% sssz, file=logfile)
|
||||
sssz = 6
|
||||
self.sec_size = sec_size = 1 << ssz
|
||||
self.short_sec_size = 1 << sssz
|
||||
(
|
||||
SAT_tot_secs, self.dir_first_sec_sid, _unused, self.min_size_std_stream,
|
||||
SSAT_first_sec_sid, SSAT_tot_secs,
|
||||
MSAT_first_sec_sid, MSAT_tot_secs,
|
||||
# ) = unpack('<ii4xiiiii', mem[44:76])
|
||||
) = unpack('<iiiiiiii', mem[44:76])
|
||||
mem_data_len = len(mem) - 512
|
||||
mem_data_secs, left_over = divmod(mem_data_len, sec_size)
|
||||
if left_over:
|
||||
#### raise CompDocError("Not a whole number of sectors")
|
||||
print("WARNING *** file size (%d) not 512 + multiple of sector size (%d)" \
|
||||
% (len(mem), sec_size), file=logfile)
|
||||
if DEBUG:
|
||||
print('sec sizes', ssz, sssz, sec_size, self.short_sec_size, file=logfile)
|
||||
print("mem data: %d bytes == %d sectors" % (mem_data_len, mem_data_secs), file=logfile)
|
||||
print("SAT_tot_secs=%d, dir_first_sec_sid=%d, min_size_std_stream=%d" \
|
||||
% (SAT_tot_secs, self.dir_first_sec_sid, self.min_size_std_stream,), file=logfile)
|
||||
print("SSAT_first_sec_sid=%d, SSAT_tot_secs=%d" % (SSAT_first_sec_sid, SSAT_tot_secs,), file=logfile)
|
||||
print("MSAT_first_sec_sid=%d, MSAT_tot_secs=%d" % (MSAT_first_sec_sid, MSAT_tot_secs,), file=logfile)
|
||||
nent = int_floor_div(sec_size, 4) # number of SID entries in a sector
|
||||
fmt = "<%di" % nent
|
||||
trunc_warned = 0
|
||||
#
|
||||
# === build the MSAT ===
|
||||
#
|
||||
MSAT = list(unpack('<109i', mem[76:512]))
|
||||
sid = MSAT_first_sec_sid
|
||||
while sid >= 0:
|
||||
if sid >= mem_data_secs:
|
||||
raise CompDocError(
|
||||
"MSAT extension: accessing sector %d but only %d in file" % (sid, mem_data_secs)
|
||||
)
|
||||
offset = 512 + sec_size * sid
|
||||
news = list(unpack(fmt, mem[offset:offset+sec_size]))
|
||||
sid = news.pop()
|
||||
MSAT.extend(news)
|
||||
if DEBUG:
|
||||
print("MSAT: len =", len(MSAT), file=logfile)
|
||||
print(MSAT, file=logfile)
|
||||
#
|
||||
# === build the SAT ===
|
||||
#
|
||||
self.SAT = []
|
||||
for msid in MSAT:
|
||||
if msid == FREESID: continue
|
||||
if msid >= mem_data_secs:
|
||||
if not trunc_warned:
|
||||
print("WARNING *** File is truncated, or OLE2 MSAT is corrupt!!", file=logfile)
|
||||
print("INFO: Trying to access sector %d but only %d available" \
|
||||
% (msid, mem_data_secs), file=logfile)
|
||||
trunc_warned = 1
|
||||
continue
|
||||
offset = 512 + sec_size * msid
|
||||
news = list(unpack(fmt, mem[offset:offset+sec_size]))
|
||||
self.SAT.extend(news)
|
||||
if DEBUG:
|
||||
print("SAT: len =", len(self.SAT), file=logfile)
|
||||
print(self.SAT, file=logfile)
|
||||
# print >> logfile, "SAT ",
|
||||
# for i, s in enumerate(self.SAT):
|
||||
# print >> logfile, "entry: %4d offset: %6d, next entry: %4d" % (i, 512 + sec_size * i, s)
|
||||
# print >> logfile, "%d:%d " % (i, s),
|
||||
print()
|
||||
|
||||
# === build the directory ===
|
||||
#
|
||||
dbytes = self._get_stream(
|
||||
self.mem, 512, self.SAT, self.sec_size, self.dir_first_sec_sid,
|
||||
name="directory")
|
||||
dirlist = []
|
||||
did = -1
|
||||
for pos in range(0, len(dbytes), 128):
|
||||
did += 1
|
||||
dirlist.append(DirNode(did, dbytes[pos:pos+128], 0))
|
||||
self.dirlist = dirlist
|
||||
_build_family_tree(dirlist, 0, dirlist[0].root_DID) # and stand well back ...
|
||||
if DEBUG:
|
||||
for d in dirlist:
|
||||
d.dump(DEBUG)
|
||||
#
|
||||
# === get the SSCS ===
|
||||
#
|
||||
sscs_dir = self.dirlist[0]
|
||||
assert sscs_dir.etype == 5 # root entry
|
||||
if sscs_dir.first_SID < 0 and sscs_dir.tot_size == 0:
|
||||
# Problem reported by Frank Hoffsuemmer: some software was
|
||||
# writing -1 instead of -2 (EOCSID) for the first_SID
|
||||
# when the SCCS was empty. Not having EOCSID caused assertion
|
||||
# failure in _get_stream.
|
||||
# Solution: avoid calling _get_stream in any case when the
|
||||
# SCSS appears to be empty.
|
||||
self.SSCS = ""
|
||||
else:
|
||||
self.SSCS = self._get_stream(
|
||||
self.mem, 512, self.SAT, sec_size, sscs_dir.first_SID,
|
||||
sscs_dir.tot_size, name="SSCS")
|
||||
# if DEBUG: print >> logfile, "SSCS", repr(self.SSCS)
|
||||
#
|
||||
# === build the SSAT ===
|
||||
#
|
||||
self.SSAT = []
|
||||
if SSAT_tot_secs > 0 and sscs_dir.tot_size == 0:
|
||||
print("WARNING *** OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero", file=logfile)
|
||||
if sscs_dir.tot_size > 0:
|
||||
sid = SSAT_first_sec_sid
|
||||
nsecs = SSAT_tot_secs
|
||||
while sid >= 0 and nsecs > 0:
|
||||
nsecs -= 1
|
||||
start_pos = 512 + sid * sec_size
|
||||
news = list(unpack(fmt, mem[start_pos:start_pos+sec_size]))
|
||||
self.SSAT.extend(news)
|
||||
sid = self.SAT[sid]
|
||||
# assert SSAT_tot_secs == 0 or sid == EOCSID
|
||||
if DEBUG: print("SSAT last sid %d; remaining sectors %d" % (sid, nsecs), file=logfile)
|
||||
assert nsecs == 0 and sid == EOCSID
|
||||
if DEBUG: print("SSAT", self.SSAT, file=logfile)
|
||||
|
||||
def _get_stream(self, mem, base, sat, sec_size, start_sid, size=None, name=''):
|
||||
# print >> self.logfile, "_get_stream", base, sec_size, start_sid, size
|
||||
sectors = []
|
||||
s = start_sid
|
||||
if size is None:
|
||||
# nothing to check against
|
||||
while s >= 0:
|
||||
start_pos = base + s * sec_size
|
||||
sectors.append(mem[start_pos:start_pos+sec_size])
|
||||
try:
|
||||
s = sat[s]
|
||||
except IndexError:
|
||||
raise CompDocError(
|
||||
"OLE2 stream %r: sector allocation table invalid entry (%d)" %
|
||||
(name, s)
|
||||
)
|
||||
assert s == EOCSID
|
||||
else:
|
||||
todo = size
|
||||
while s >= 0:
|
||||
start_pos = base + s * sec_size
|
||||
grab = sec_size
|
||||
if grab > todo:
|
||||
grab = todo
|
||||
todo -= grab
|
||||
sectors.append(mem[start_pos:start_pos+grab])
|
||||
try:
|
||||
s = sat[s]
|
||||
except IndexError:
|
||||
raise CompDocError(
|
||||
"OLE2 stream %r: sector allocation table invalid entry (%d)" %
|
||||
(name, s)
|
||||
)
|
||||
assert s == EOCSID
|
||||
if todo != 0:
|
||||
print("WARNING *** OLE2 stream %r: expected size %d, actual size %d" \
|
||||
% (name, size, size - todo), file=self.logfile)
|
||||
return b''.join(sectors)
|
||||
|
||||
def _dir_search(self, path, storage_DID=0):
|
||||
# Return matching DirNode instance, or None
|
||||
head = path[0]
|
||||
tail = path[1:]
|
||||
dl = self.dirlist
|
||||
for child in dl[storage_DID].children:
|
||||
if dl[child].name.lower() == head.lower():
|
||||
et = dl[child].etype
|
||||
if et == 2:
|
||||
return dl[child]
|
||||
if et == 1:
|
||||
if not tail:
|
||||
raise CompDocError("Requested component is a 'storage'")
|
||||
return self._dir_search(tail, child)
|
||||
dl[child].dump(1)
|
||||
raise CompDocError("Requested stream is not a 'user stream'")
|
||||
return None
|
||||
|
||||
##
|
||||
# Interrogate the compound document's directory; return the stream as a string if found, otherwise
|
||||
# return None.
|
||||
# @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
|
||||
|
||||
def get_named_stream(self, qname):
|
||||
d = self._dir_search(qname.split("/"))
|
||||
if d is None:
|
||||
return None
|
||||
if d.tot_size >= self.min_size_std_stream:
|
||||
return self._get_stream(
|
||||
self.mem, 512, self.SAT, self.sec_size, d.first_SID,
|
||||
d.tot_size, name=qname)
|
||||
else:
|
||||
return self._get_stream(
|
||||
self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
|
||||
d.tot_size, name=qname + " (from SSCS)")
|
||||
|
||||
# Interrogate the compound document's directory.
|
||||
# If the named stream is not found, (None, 0, 0) will be returned.
|
||||
# If the named stream is found and is contiguous within the original byte sequence ("mem")
|
||||
# used when the document was opened,
|
||||
# then (mem, offset_to_start_of_stream, length_of_stream) is returned.
|
||||
# Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.
|
||||
# @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
|
||||
|
||||
def locate_named_stream(self, qname):
|
||||
d = self._dir_search(qname.split("/"))
|
||||
if d is None:
|
||||
return (None, 0, 0)
|
||||
if d.tot_size >= self.min_size_std_stream:
|
||||
return self._locate_stream(self.mem, 512, self.SAT, self.sec_size, d.first_SID, d.tot_size)
|
||||
else:
|
||||
return (
|
||||
self._get_stream(
|
||||
self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
|
||||
d.tot_size, qname + " (from SSCS)"),
|
||||
0,
|
||||
d.tot_size
|
||||
)
|
||||
return (None, 0, 0) # not found
|
||||
|
||||
def _locate_stream(self, mem, base, sat, sec_size, start_sid, size):
|
||||
# print >> self.logfile, "_locate_stream", base, sec_size, start_sid, size
|
||||
s = start_sid
|
||||
if s < 0:
|
||||
raise CompDocError("_locate_stream: start_sid (%d) is -ve" % start_sid)
|
||||
p = -99 # dummy previous SID
|
||||
start_pos = -9999
|
||||
end_pos = -8888
|
||||
slices = []
|
||||
while s >= 0:
|
||||
if s == p+1:
|
||||
# contiguous sectors
|
||||
end_pos += sec_size
|
||||
else:
|
||||
# start new slice
|
||||
if p >= 0:
|
||||
# not first time
|
||||
slices.append((start_pos, end_pos))
|
||||
start_pos = base + s * sec_size
|
||||
end_pos = start_pos + sec_size
|
||||
p = s
|
||||
s = sat[s]
|
||||
assert s == EOCSID
|
||||
# print >> self.logfile, len(slices) + 1, "slices"
|
||||
if not slices:
|
||||
# The stream is contiguous ... just what we like!
|
||||
return (mem, start_pos, size)
|
||||
slices.append((start_pos, end_pos))
|
||||
return (b''.join([mem[start_pos:end_pos] for start_pos, end_pos in slices]), 0, size)
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,276 @@
|
|||
# Author: mozman <mozman@gmx.at>
|
||||
# Purpose: xfcell -- cell with convenient xf function
|
||||
# Created: 04.12.2010
|
||||
# Copyright (C) 2010, Manfred Moitzi
|
||||
# License: BSD-style licence
|
||||
|
||||
"""
|
||||
The XFCell() object contains the data for one cell.
|
||||
|
||||
WARNING: You don't call this class yourself. You access Cell objects
|
||||
via methods of the Sheet object(s) that you found in the Book object that
|
||||
was returned when you called xlrd.open_workbook("myfile.xls").
|
||||
|
||||
Cell objects have four attributes: `ctype` is an int, `value` (which depends
|
||||
on `ctype`), `xf_index` and `sheet`, a reference to the containing sheet. If
|
||||
**formatting_info** is not enabled when the workbook is opened, xf_index will
|
||||
be **None**.
|
||||
|
||||
The following table describes the types of cells and how their values
|
||||
are represented in Python.
|
||||
|
||||
=============== ===== ============ ==========================================
|
||||
Type symbol Const Python value Note
|
||||
=============== ===== ============ ==========================================
|
||||
XL_CELL_EMPTY 0 ""
|
||||
XL_CELL_TEXT 1 str
|
||||
XL_CELL_NUMBER 2 float
|
||||
XL_CELL_DATE 3 float
|
||||
XL_CELL_BOOLEAN 4 int 1 means TRUE, 0 means FALSE
|
||||
XL_CELL_ERROR 5 int representing internal Excel codes; for a
|
||||
text representation, refer to the supplied
|
||||
dictionary error_text_from_code
|
||||
XL_CELL_BLANK 6 "" this type will appear only when
|
||||
open_workbook(..., formatting_info=True)
|
||||
is used.
|
||||
=============== ===== ============ ==========================================
|
||||
"""
|
||||
|
||||
import datetime
|
||||
|
||||
from .xldate import xldate_as_tuple
|
||||
from .biffh import XL_CELL_DATE, BaseObject
|
||||
|
||||
class XFCell(BaseObject):
|
||||
""" Extended Cell() class with convenient methods for easy access of cell
|
||||
properties.
|
||||
"""
|
||||
__slots__ = ['sheet', 'ctype', 'value', 'xf']
|
||||
|
||||
def __init__(self, ctype, value, xf_index=None, sheet=None):
|
||||
self.sheet = sheet
|
||||
self.ctype = ctype
|
||||
self.value = value
|
||||
|
||||
if xf_index is not None:
|
||||
self.xf = self.book.xf_list[xf_index]
|
||||
else:
|
||||
self.xf = None
|
||||
|
||||
@property
|
||||
def book(self):
|
||||
return self.sheet.book
|
||||
|
||||
@property
|
||||
def has_xf(self):
|
||||
return (self.xf is not None)
|
||||
|
||||
@property
|
||||
def xf_index(self):
|
||||
if self.has_xf:
|
||||
return self.xf.xf_index
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def parent_style(self):
|
||||
return self.book.xf_list[self.xf.parent_style_index]
|
||||
|
||||
@property
|
||||
def is_datetime(self):
|
||||
return self.ctype == XL_CELL_DATE
|
||||
|
||||
@property
|
||||
def has_date(self):
|
||||
if self.is_datetime:
|
||||
return self.value > 1.
|
||||
return False
|
||||
|
||||
def get_color(self, index):
|
||||
return self.book.colour_map[index]
|
||||
|
||||
def datetime(self):
|
||||
""" Returns a datetime.datetime object if cell type is XL_CELL_DATE
|
||||
else raises a TypeError, and raises ValueError if the the cell has
|
||||
not date value (only time value is present).
|
||||
"""
|
||||
if self.is_datetime:
|
||||
if self.has_date:
|
||||
date = xldate_as_tuple(self.value, self.book.datemode)
|
||||
return datetime.datetime(*date)
|
||||
else:
|
||||
raise ValueError("Cell has no date value.")
|
||||
else:
|
||||
raise TypeError("Cell is not a XL_CELL_DATE.")
|
||||
|
||||
def date(self):
|
||||
""" Returns a datetime.date object if cell type is XL_CELL_DATE
|
||||
else raises a **TypeError**. Raises **ValueError** if the cell
|
||||
doesn't have a date value (only time value is present).
|
||||
"""
|
||||
dt = self.datetime()
|
||||
return dt.date()
|
||||
|
||||
def time(self):
|
||||
""" Returns a datetime.time object if cell type is XL_CELL_DATE else
|
||||
raises a TypeError.
|
||||
"""
|
||||
if self.is_datetime:
|
||||
date = xldate_as_tuple(self.value, self.book.datemode)
|
||||
return datetime.time(date[3], date[4], date[5])
|
||||
else:
|
||||
raise TypeError("Cell is not a XL_CELL_DATE.")
|
||||
|
||||
#
|
||||
# access the XFBackground() class
|
||||
#
|
||||
|
||||
@property
|
||||
def background(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._background_flag == 0:
|
||||
return self.xf.background
|
||||
elif self.xf._background_flag:
|
||||
return self.xf.background
|
||||
else:
|
||||
return self.parent_style.background
|
||||
|
||||
def background_color(self):
|
||||
""" Get cell background-color as 3-tuple. """
|
||||
color_index = self.xf.background.background_colour_index
|
||||
return self.get_color(color_index)
|
||||
|
||||
def fill_pattern(self):
|
||||
return self.xf.background.fill_pattern
|
||||
|
||||
def pattern_color(self):
|
||||
color_index = self.xf.background.pattern_colour_index
|
||||
return self.get_color(color_index)
|
||||
|
||||
#
|
||||
# access the Font() class
|
||||
#
|
||||
|
||||
@property
|
||||
def font_index(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._font_flag == 0:
|
||||
return self.xf.font_index
|
||||
elif self.xf._font_flag:
|
||||
return self.xf.font_index
|
||||
else:
|
||||
return self.parent_style.font_index
|
||||
|
||||
@property
|
||||
def font(self):
|
||||
""" Get the Font() class. """
|
||||
return self.book.font_list[self.xf.font_index]
|
||||
|
||||
def font_color(self):
|
||||
""" Get cell foreground-color as 3-tuple. """
|
||||
return self.get_color(self.font.colour_index)
|
||||
|
||||
#
|
||||
# access the Format() class
|
||||
#
|
||||
|
||||
@property
|
||||
def format_key(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._format_flag == 0:
|
||||
return self.xf.format_key
|
||||
elif self.xf._format_flag:
|
||||
return self.xf.format_key
|
||||
else:
|
||||
return self.parent_style.format_key
|
||||
|
||||
@property
|
||||
def format(self):
|
||||
""" Get the Format() class. """
|
||||
return self.book.format_map[self.format_key]
|
||||
|
||||
def format_str(self):
|
||||
""" Get the associated 'format_str'. """
|
||||
return self.format.format_str
|
||||
|
||||
#
|
||||
# access the XFAligment() class
|
||||
#
|
||||
|
||||
@property
|
||||
def alignment(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._alignment_flag == 0:
|
||||
return self.xf.alignment
|
||||
elif self.xf._alignment_flag:
|
||||
return self.xf.alignment
|
||||
else:
|
||||
return self.parent_style.alignment
|
||||
|
||||
#
|
||||
# access the XFBorder() class
|
||||
#
|
||||
|
||||
@property
|
||||
def border(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._border_flag == 0:
|
||||
return self.xf.border
|
||||
elif self.xf._border_flag:
|
||||
return self.xf.border
|
||||
else:
|
||||
return self.parent_style.border
|
||||
|
||||
def bordercolors(self):
|
||||
""" Get border color as dict of rgb-color-tuples. """
|
||||
border = self.border
|
||||
return {
|
||||
'top': self.get_color(border.top_colour_index),
|
||||
'bottom': self.get_color(border.bottom_colour_index),
|
||||
'left': self.get_color(border.left_colour_index),
|
||||
'right': self.get_color(border.right_colour_index),
|
||||
'diag': self.get_color(border.diag_colour_index),
|
||||
}
|
||||
|
||||
def borderstyles(self):
|
||||
""" Get border styles as dict of ints. """
|
||||
border = self.border
|
||||
return {
|
||||
'top': border.top_line_style,
|
||||
'bottom': border.bottom_line_style,
|
||||
'left': border.left_line_style,
|
||||
'right': border.right_line_style,
|
||||
'diag': border.diag_line_style,
|
||||
}
|
||||
|
||||
@property
|
||||
def has_up_diag(self):
|
||||
""" Draw a line across the cell from bottom left to top right. """
|
||||
return bool(self.border.diag_up)
|
||||
|
||||
@property
|
||||
def has_down_diag(self):
|
||||
""" Draw a line across the cell from top left to bottom right. """
|
||||
return bool(self.border.diag_down)
|
||||
|
||||
#
|
||||
# access the XFProtection() class
|
||||
#
|
||||
|
||||
@property
|
||||
def protection(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._protection_flag == 0:
|
||||
return self.xf.protection
|
||||
elif self.xf._protection_flag:
|
||||
return self.xf.protection
|
||||
else:
|
||||
return self.parent_style.protection
|
||||
|
||||
@property
|
||||
def is_cell_locked(self):
|
||||
return bool(self.protection.cell_locked)
|
||||
|
||||
@property
|
||||
def is_formula_hidden(self):
|
||||
return bool(self.protection.cell_locked)
|
|
@ -0,0 +1,84 @@
|
|||
# Author: mozman <mozman@gmx.at>
|
||||
# Purpose: xfconst -- xf constants
|
||||
# Created: 05.12.2010
|
||||
# Copyright (C) 2010, Manfred Moitzi
|
||||
# License: BSD-style licence
|
||||
|
||||
# excelfileformat section 5.115.1 pg. 219
|
||||
HOR_ALIGN_GENERAL = 0
|
||||
HOR_ALIGN_LEFT = 1
|
||||
HOR_ALIGN_CENTRED = 2
|
||||
HOR_ALIGN_RIGHT = 3
|
||||
HOR_ALIGN_FILLED = 4
|
||||
HOR_ALIGN_JUSTIFIED = 5
|
||||
HOR_ALIGN_CENTRED_ACROSS_SELECTION = 6
|
||||
HOR_ALIGN_DISTRIBUTED = 7
|
||||
|
||||
# excelfileformat section 5.115.1 pg. 220
|
||||
VERT_ALIGN_TOP = 0
|
||||
VERT_ALIGN_CENTRED = 1
|
||||
VERT_ALIGN_BOTTOM = 2
|
||||
VERT_ALIGN_JUSTIFIED = 3
|
||||
VERT_ALIGN_DISTRIBUTED = 4
|
||||
|
||||
# excelfileformat section 5.115.1 pg. 220
|
||||
ORIENTATION_NONE = 0
|
||||
ORIENTATION_STACKED = 1
|
||||
ORIENTATION_90_COUNTERCLOCKWISE = 2
|
||||
ORIENTATION_90_CLOCKWISE = 3
|
||||
|
||||
# excelfileformat section 5.115.1 pg. 220
|
||||
ROTATION_NONE = 0
|
||||
ROTATION_STACKED = 255
|
||||
# other values:
|
||||
# Value Description
|
||||
# ======= =================================
|
||||
# 1-90 1 to 90 degrees counterclockwise
|
||||
# 91-180 1 to 90 degrees clockwise
|
||||
|
||||
# excelfileformat section 2.5.11 Line Styles for Cell Borders pg. 24
|
||||
LS_NOLINE = 0
|
||||
|
||||
# solid line
|
||||
LS_THIN = 1
|
||||
|
||||
# solid line
|
||||
LS_MEDIUM = 2
|
||||
|
||||
# - - - - - - - - -
|
||||
LS_DASHED = 3
|
||||
|
||||
# .................
|
||||
LS_DOTTED = 4
|
||||
|
||||
# solid line
|
||||
LS_THICK = 5
|
||||
|
||||
# =================
|
||||
LS_DOUBLE = 6
|
||||
|
||||
# very thin dotted
|
||||
LS_HAIR = 7
|
||||
|
||||
# - - - - - - - - -
|
||||
LS_MEDIUM_DASHED = 8
|
||||
|
||||
# - . - . - . - . -
|
||||
LS_THIN_DASH_DOTTED = 9
|
||||
|
||||
# - . - . - . - . -
|
||||
LS_MEDIUM_DASH_DOTTED = 10
|
||||
|
||||
# - .. - .. - .. - .. -
|
||||
LS_THIN_DASH_DOT_DOTTED = 11
|
||||
|
||||
# - .. - .. - .. - .. -
|
||||
LS_MEDIUM_DASH_DOT_DOTTED = 12
|
||||
|
||||
# \\\ . \\\ . \\\ . \\\ . \\\
|
||||
LS_SLANTED_MEDIUM_DASH_DOTTED = 13
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,167 @@
|
|||
# No part of the content of this file was derived from the works of David Giffin.
|
||||
#
|
||||
# Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd
|
||||
# This module is part of the xlrd3 package, which is released under a
|
||||
# BSD-style licence.
|
||||
#
|
||||
# Provides function(s) for dealing with Microsoft Excel ™ dates.
|
||||
#
|
||||
# 2008-10-18 SJM Fix bug in xldate_from_date_tuple (affected some years after 2099)
|
||||
#
|
||||
# The conversion from days to (year, month, day) starts with
|
||||
# an integral "julian day number" aka JDN.
|
||||
# FWIW, JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713.
|
||||
# More importantly:
|
||||
# Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0
|
||||
# Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0
|
||||
|
||||
def ifd(x, y):
|
||||
return divmod(x, y)[0]
|
||||
|
||||
_JDN_delta = (2415080 - 61, 2416482 - 1)
|
||||
assert _JDN_delta[1] - _JDN_delta[0] == 1462
|
||||
|
||||
class XLDateError(ValueError): pass
|
||||
|
||||
class XLDateNegative(XLDateError): pass
|
||||
class XLDateAmbiguous(XLDateError): pass
|
||||
class XLDateTooLarge(XLDateError): pass
|
||||
class XLDateBadDatemode(XLDateError): pass
|
||||
class XLDateBadTuple(XLDateError): pass
|
||||
|
||||
_XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462) # This is equivalent to 10000-01-01
|
||||
|
||||
# Convert an Excel number (presumed to represent a date, a datetime or a time) into
|
||||
# a tuple suitable for feeding to datetime or mx.DateTime constructors.
|
||||
# @param xldate The Excel number
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
# <br>WARNING: when using this function to
|
||||
# interpret the contents of a workbook, you should pass in the Book.datemode
|
||||
# attribute of that workbook. Whether
|
||||
# the workbook has ever been anywhere near a Macintosh is irrelevant.
|
||||
# @return Gregorian (year, month, day, hour, minute, nearest_second).
|
||||
# <br>Special case: if 0.0 <= xldate < 1.0, it is assumed to represent a time;
|
||||
# (0, 0, 0, hour, minute, second) will be returned.
|
||||
# <br>Note: 1904-01-01 is not regarded as a valid date in the datemode 1 system; its "serial number"
|
||||
# is zero.
|
||||
# @throws XLDateNegative xldate < 0.00
|
||||
# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
|
||||
# @throws XLDateTooLarge Gregorian year 10000 or later
|
||||
# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
|
||||
# @throws XLDateError Covers the 4 specific errors
|
||||
|
||||
def xldate_as_tuple(xldate, datemode):
|
||||
if datemode not in (0, 1):
|
||||
raise XLDateBadDatemode(datemode)
|
||||
if xldate == 0.00:
|
||||
return (0, 0, 0, 0, 0, 0)
|
||||
if xldate < 0.00:
|
||||
raise XLDateNegative(xldate)
|
||||
xldays = int(xldate)
|
||||
frac = xldate - xldays
|
||||
seconds = int(round(frac * 86400.0))
|
||||
assert 0 <= seconds <= 86400
|
||||
if seconds == 86400:
|
||||
hour = minute = second = 0
|
||||
xldays += 1
|
||||
else:
|
||||
# second = seconds % 60; minutes = seconds // 60
|
||||
minutes, second = divmod(seconds, 60)
|
||||
# minute = minutes % 60; hour = minutes // 60
|
||||
hour, minute = divmod(minutes, 60)
|
||||
if xldays >= _XLDAYS_TOO_LARGE[datemode]:
|
||||
raise XLDateTooLarge(xldate)
|
||||
|
||||
if xldays == 0:
|
||||
return (0, 0, 0, hour, minute, second)
|
||||
|
||||
if xldays < 61 and datemode == 0:
|
||||
raise XLDateAmbiguous(xldate)
|
||||
|
||||
jdn = xldays + _JDN_delta[datemode]
|
||||
yreg = (ifd(ifd(jdn * 4 + 274277, 146097) * 3, 4) + jdn + 1363) * 4 + 3
|
||||
mp = ifd(yreg % 1461, 4) * 535 + 333
|
||||
d = ifd(mp % 16384, 535) + 1
|
||||
# mp /= 16384
|
||||
mp >>= 14
|
||||
if mp >= 10:
|
||||
return (ifd(yreg, 1461) - 4715, mp - 9, d, hour, minute, second)
|
||||
else:
|
||||
return (ifd(yreg, 1461) - 4716, mp + 3, d, hour, minute, second)
|
||||
|
||||
# === conversions from date/time to xl numbers
|
||||
|
||||
def _leap(y):
|
||||
if y % 4: return 0
|
||||
if y % 100: return 1
|
||||
if y % 400: return 0
|
||||
return 1
|
||||
|
||||
_days_in_month = (None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
|
||||
|
||||
# Convert a date tuple (year, month, day) to an Excel date.
|
||||
# @param year Gregorian year.
|
||||
# @param month 1 <= month <= 12
|
||||
# @param day 1 <= day <= last day of that (year, month)
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
|
||||
# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
|
||||
# @throws XLDateBadTuple (year, month, day) is too early/late or has invalid component(s)
|
||||
# @throws XLDateError Covers the specific errors
|
||||
|
||||
def xldate_from_date_tuple(datetuple, datemode):
|
||||
|
||||
(year, month, day) = datetuple
|
||||
if datemode not in (0, 1):
|
||||
raise XLDateBadDatemode(datemode)
|
||||
|
||||
if year == 0 and month == 0 and day == 0:
|
||||
return 0.00
|
||||
|
||||
if not (1900 <= year <= 9999):
|
||||
raise XLDateBadTuple("Invalid year: %r" % ((year, month, day),))
|
||||
if not (1 <= month <= 12):
|
||||
raise XLDateBadTuple("Invalid month: %r" % ((year, month, day),))
|
||||
if day < 1 \
|
||||
or (day > _days_in_month[month] and not(day == 29 and month == 2 and _leap(year))):
|
||||
raise XLDateBadTuple("Invalid day: %r" % ((year, month, day),))
|
||||
|
||||
Yp = year + 4716
|
||||
M = month
|
||||
if M <= 2:
|
||||
Yp = Yp - 1
|
||||
Mp = M + 9
|
||||
else:
|
||||
Mp = M - 3
|
||||
jdn = ifd(1461 * Yp, 4) + ifd(979 * Mp + 16, 32) + \
|
||||
day - 1364 - ifd(ifd(Yp + 184, 100) * 3, 4)
|
||||
xldays = jdn - _JDN_delta[datemode]
|
||||
if xldays <= 0:
|
||||
raise XLDateBadTuple("Invalid (year, month, day): %r" % ((year, month, day),))
|
||||
if xldays < 61 and datemode == 0:
|
||||
raise XLDateAmbiguous("Before 1900-03-01: %r" % ((year, month, day),))
|
||||
return float(xldays)
|
||||
|
||||
# Convert a time tuple (hour, minute, second) to an Excel "date" value (fraction of a day).
|
||||
# @param hour 0 <= hour < 24
|
||||
# @param minute 0 <= minute < 60
|
||||
# @param second 0 <= second < 60
|
||||
# @throws XLDateBadTuple Out-of-range hour, minute, or second
|
||||
|
||||
def xldate_from_time_tuple(timetuple):
|
||||
(hour, minute, second) = timetuple
|
||||
if 0 <= hour < 24 and 0 <= minute < 60 and 0 <= second < 60:
|
||||
return ((second / 60.0 + minute) / 60.0 + hour) / 24.0
|
||||
raise XLDateBadTuple("Invalid (hour, minute, second): %r" % ((hour, minute, second),))
|
||||
|
||||
# Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value.
|
||||
# For more details, refer to other xldate_from_*_tuple functions.
|
||||
# @param datetime_tuple (year, month, day, hour, minute, second)
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
|
||||
def xldate_from_datetime_tuple(datetime_tuple, datemode):
|
||||
return (
|
||||
xldate_from_date_tuple(datetime_tuple[:3], datemode)
|
||||
+
|
||||
xldate_from_time_tuple(datetime_tuple[3:])
|
||||
)
|
|
@ -657,7 +657,7 @@ class TablibTestCase(unittest.TestCase):
|
|||
self.founders.append(('Old', 'Man', 100500))
|
||||
|
||||
self.assertEquals(
|
||||
u"""
|
||||
"""
|
||||
first_name|last_name |gpa
|
||||
----------|----------|------
|
||||
John |Adams |90
|
||||
|
|
Loading…
Reference in New Issue