From d807c603466d05a2288f3bde8454f331084a6649 Mon Sep 17 00:00:00 2001 From: Marc Abramowitz Date: Mon, 14 May 2012 17:14:46 -0700 Subject: [PATCH 1/5] Tweak setup.py for py.test (pytest?) --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 63b1d76..bde8621 100755 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ if sys.argv[-1] == 'test': print('py.test required.') sys.exit(1) - os.system('pytest test_tablib.py') + os.system('py.test test_tablib.py') sys.exit() setup( @@ -70,4 +70,5 @@ setup( 'Programming Language :: Python :: 3.1', 'Programming Language :: Python :: 3.2', ), + tests_require=['pytest'], ) From 6a449d497aa40e6c1aee2d3ce55fce680d1f0a47 Mon Sep 17 00:00:00 2001 From: Marc Abramowitz Date: Mon, 14 May 2012 20:44:23 -0700 Subject: [PATCH 2/5] Add support for tox --- .gitignore | 5 ++++- setup.py | 3 +++ tox.ini | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore index f17ed41..acf2d9b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,4 +22,7 @@ coverage.xml nosetests.xml junit-py25.xml junit-py26.xml -junit-py27.xml \ No newline at end of file +junit-py27.xml + +# tox noise +.tox diff --git a/setup.py b/setup.py index bde8621..cd6e39c 100755 --- a/setup.py +++ b/setup.py @@ -49,6 +49,9 @@ setup( 'tablib', 'tablib.formats', 'tablib.packages', 'tablib.packages.xlwt', + 'tablib.packages.xlrd', + 'tablib.packages.omnijson', + 'tablib.packages.odf', 'tablib.packages.openpyxl', 'tablib.packages.openpyxl.shared', 'tablib.packages.openpyxl.reader', diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..83c5a8e --- /dev/null +++ b/tox.ini @@ -0,0 +1,15 @@ +# Tox (http://tox.testrun.org/) is a tool for running tests +# in multiple virtualenvs. This configuration file will run the +# test suite on all supported python versions. To use it, "pip install tox" +# and then run "tox" from this directory. + +[tox] +envlist = py26, py27, py32, pypy + +[testenv] +commands = python setup.py test +deps = + pytest + PyYAML + xlrd + omnijson From 6a825a8a3965066a71ebf6719498fbd77d7fd912 Mon Sep 17 00:00:00 2001 From: Marc Abramowitz Date: Tue, 15 May 2012 07:18:15 -0700 Subject: [PATCH 3/5] NOTICE: Add license info for xlrd3 and xlwt3 --- NOTICE | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/NOTICE b/NOTICE index 9a3d9bd..f79efa3 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Tablib includes some vendorized python libraries: ordereddict, odfpy, pyyaml, -simplejson, unicodecsv, and xlwt. +simplejson, unicodecsv, xlrd, xlrd3, xlwt, and xlwt3. Markup License ============== @@ -299,8 +299,8 @@ or implied, of Jeremy Dunck. -XLWT License -============ +XLWT (and XLWT3) License +======================== Portions copyright © 2007, Stephen John Machin, Lingfo Pty Ltd All rights reserved. @@ -384,8 +384,8 @@ Subject: pyExcelerator -XLRD License -============ +XLRD (and XLRD3) License +======================== Portions copyright © 2005-2009, Stephen John Machin, Lingfo Pty Ltd All rights reserved. @@ -456,4 +456,4 @@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +OF THE POSSIBILITY OF SUCH DAMAGE. From a3781e3c896071e49d7cfbac7f24d3f0c5336be9 Mon Sep 17 00:00:00 2001 From: Marc Abramowitz Date: Tue, 15 May 2012 07:19:15 -0700 Subject: [PATCH 4/5] Changes for Python 3 compatibility, including vendorizing xlrd3 --- setup.py | 7 + tablib/compat.py | 5 +- tablib/core.py | 14 +- tablib/formats/_xls.py | 4 +- tablib/packages/xlrd3/__init__.py | 1642 +++++++++++++++++++++ tablib/packages/xlrd3/biffh.py | 620 ++++++++ tablib/packages/xlrd3/compdoc.py | 346 +++++ tablib/packages/xlrd3/formatting.py | 1186 +++++++++++++++ tablib/packages/xlrd3/formula.py | 2083 +++++++++++++++++++++++++++ tablib/packages/xlrd3/sheet.py | 1611 +++++++++++++++++++++ tablib/packages/xlrd3/xfcell.py | 276 ++++ tablib/packages/xlrd3/xfconst.py | 84 ++ tablib/packages/xlrd3/xldate.py | 167 +++ test_tablib.py | 2 +- 14 files changed, 8036 insertions(+), 11 deletions(-) create mode 100644 tablib/packages/xlrd3/__init__.py create mode 100644 tablib/packages/xlrd3/biffh.py create mode 100644 tablib/packages/xlrd3/compdoc.py create mode 100644 tablib/packages/xlrd3/formatting.py create mode 100644 tablib/packages/xlrd3/formula.py create mode 100644 tablib/packages/xlrd3/sheet.py create mode 100644 tablib/packages/xlrd3/xfcell.py create mode 100644 tablib/packages/xlrd3/xfconst.py create mode 100644 tablib/packages/xlrd3/xldate.py diff --git a/setup.py b/setup.py index cd6e39c..6b4c603 100755 --- a/setup.py +++ b/setup.py @@ -49,13 +49,20 @@ setup( 'tablib', 'tablib.formats', 'tablib.packages', 'tablib.packages.xlwt', + 'tablib.packages.xlwt3', 'tablib.packages.xlrd', + 'tablib.packages.xlrd3', 'tablib.packages.omnijson', 'tablib.packages.odf', + 'tablib.packages.odf3', 'tablib.packages.openpyxl', 'tablib.packages.openpyxl.shared', 'tablib.packages.openpyxl.reader', 'tablib.packages.openpyxl.writer', + 'tablib.packages.openpyxl3', + 'tablib.packages.openpyxl3.shared', + 'tablib.packages.openpyxl3.reader', + 'tablib.packages.openpyxl3.writer', 'tablib.packages.yaml', 'tablib.packages.unicodecsv' ], diff --git a/tablib/compat.py b/tablib/compat.py index f6bcf8d..919f464 100644 --- a/tablib/compat.py +++ b/tablib/compat.py @@ -23,6 +23,8 @@ except ImportError: if is_py3: from io import BytesIO import tablib.packages.xlwt3 as xlwt + import tablib.packages.xlrd3 as xlrd + from tablib.packages.xlrd3.biffh import XLRDError from tablib.packages import markup3 as markup from tablib.packages import openpyxl3 as openpyxl from tablib.packages.odf3 import opendocument, style, text, table @@ -40,6 +42,7 @@ else: from cStringIO import StringIO import tablib.packages.xlwt as xlwt import tablib.packages.xlrd as xlrd + from tablib.packages.xlrd.biffh import XLRDError from tablib.packages import markup from itertools import ifilter from tablib.packages import openpyxl @@ -47,4 +50,4 @@ else: from tablib.packages import unicodecsv as csv - unicode = unicode \ No newline at end of file + unicode = unicode diff --git a/tablib/core.py b/tablib/core.py index c52671e..b1f0aca 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -226,19 +226,21 @@ class Dataset(object): def __unicode__(self): result = [self.__headers] - result.extend(map(unicode, row) for row in self._data) + result.extend(list(map(unicode, row)) for row in self._data) # here, we calculate max width for each column - lens = (map(len, row) for row in result) - field_lens = map(max, zip(*lens)) + lens = (list(map(len, row)) for row in result) + field_lens = list(map(max, zip(*lens))) # delimiter between header and data - result.insert(1, [u'-' * length for length in field_lens]) + result.insert(1, ['-' * length for length in field_lens]) - format_string = u'|'.join(u'{%s:%s}' % item for item in enumerate(field_lens)) + format_string = '|'.join('{%s:%s}' % item for item in enumerate(field_lens)) - return u'\n'.join(format_string.format(*row) for row in result) + return '\n'.join(format_string.format(*row) for row in result) + def __str__(self): + return self.__unicode__() # --------- diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index 6530d91..15bef75 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -5,9 +5,7 @@ import sys -from tablib.compat import BytesIO, xlwt -from tablib.packages import xlrd -from tablib.packages.xlrd.biffh import XLRDError +from tablib.compat import BytesIO, xlwt, xlrd, XLRDError import tablib title = 'xls' diff --git a/tablib/packages/xlrd3/__init__.py b/tablib/packages/xlrd3/__init__.py new file mode 100644 index 0000000..98f322b --- /dev/null +++ b/tablib/packages/xlrd3/__init__.py @@ -0,0 +1,1642 @@ +# Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd +# This module is part of the xlrd3 package, which is released under a +# BSD-style licence. +# +# xlrd3, the Python 3 port of xlrd v0.7.1 +# +# A Python module for extracting data from MS Excel spreadsheet files. +# +# General information +# +# Acknowledgements +# +# Development of this module would not have been possible without the document +# "OpenOffice.org's Documentation of the Microsoft Excel File Format" +# ("OOo docs" for short). +# The latest version is available from OpenOffice.org in +# http://sc.openoffice.org/excelfileformat.pdf PDF format +# and +# http://sc.openoffice.org/excelfileformat.odt ODT format. +# Small portions of the OOo docs are reproduced in this +# document. A study of the OOo docs is recommended for those who wish a +# deeper understanding of the Excel file layout than the xlrd docs can provide. +# +# Provision of formatting information in version 0.6.1 was funded by +# http://www.simplistix.co.uk Simplistix Ltd. +# +# Unicode +# +# This module presents all text strings as Python unicode objects. +# From Excel 97 onwards, text in Excel spreadsheets has been stored as Unicode. +# Older files (Excel 95 and earlier) don't keep strings in Unicode; +# a CODEPAGE record provides a codepage number (for example, 1252) which is +# used by xlrd to derive the encoding (for same example: "cp1252") which is +# used to translate to Unicode. +# +# If the CODEPAGE record is missing (possible if the file was created +# by third-party software), xlrd will assume that the encoding is ascii, and keep going. +# If the actual encoding is not ascii, a UnicodeDecodeError exception will be raised and +# you will need to determine the encoding yourself, and tell xlrd:: +# +# book = xlrd.open_workbook(..., encoding_override="cp1252") +# +# If the CODEPAGE record exists but is wrong (for example, the codepage +# number is 1251, but the strings are actually encoded in koi8_r), +# it can be overridden using the same mechanism. +# The supplied runxlrd.py has a corresponding command-line argument, which +# may be used for experimentation:: +# +# runxlrd.py -e koi8_r 3rows myfile.xls +# +# The first place to look for an encoding ("codec name") is +# http://docs.python.org/lib/standard-encodings.html +# the Python documentation. +# +# Dates in Excel spreadsheets +# +# In reality, there are no such things. What you have are floating point +# numbers and pious hope. +# There are several problems with Excel dates: +# +# (1) Dates are not stored as a separate data type; they are stored as +# floating point numbers and you have to rely on +# (a) the "number format" applied to them in Excel and/or +# (b) knowing which cells are supposed to have dates in them. +# This module helps with (a) by inspecting the +# format that has been applied to each number cell; +# if it appears to be a date format, the cell +# is classified as a date rather than a number. Feedback on this feature, +# especially from non-English-speaking locales, would be appreciated. +# +# (2) Excel for Windows stores dates by default as the number of +# days (or fraction thereof) since 1899-12-31T00:00:00. Excel for +# Macintosh uses a default start date of 1904-01-01T00:00:00. The date +# system can be changed in Excel on a per-workbook basis (for example: +# Tools -> Options -> Calculation, tick the "1904 date system" box). +# This is of course a bad idea if there are already dates in the +# workbook. There is no good reason to change it even if there are no +# dates in the workbook. Which date system is in use is recorded in the +# workbook. A workbook transported from Windows to Macintosh (or vice +# versa) will work correctly with the host Excel. When using this +# module's xldate_as_tuple function to convert numbers from a workbook, +# you must use the datemode attribute of the Book object. If you guess, +# or make a judgement depending on where you believe the workbook was +# created, you run the risk of being 1462 days out of kilter. +# +# Reference: +# http://support.microsoft.com/default.aspx?scid=KB;EN-US;q180162 +# +# (3) The Excel implementation of the Windows-default 1900-based date system works on the +# incorrect premise that 1900 was a leap year. It interprets the number 60 as meaning 1900-02-29, +# which is not a valid date. Consequently any number less than 61 is ambiguous. Example: is 59 the +# result of 1900-02-28 entered directly, or is it 1900-03-01 minus 2 days? The OpenOffice.org Calc +# program "corrects" the Microsoft problem; entering 1900-02-27 causes the number 59 to be stored. +# Save as an XLS file, then open the file with Excel -- you'll see 1900-02-28 displayed. +# +# Reference: http://support.microsoft.com/default.aspx?scid=kb;en-us;214326 +# +# (4) The Macintosh-default 1904-based date system counts 1904-01-02 as day 1 and 1904-01-01 as day zero. +# Thus any number such that (0.0 <= number < 1.0) is ambiguous. Is 0.625 a time of day (15:00:00), +# independent of the calendar, +# or should it be interpreted as an instant on a particular day (1904-01-01T15:00:00)? +# The xldate_* functions in this module +# take the view that such a number is a calendar-independent time of day (like Python's datetime.time type) for both +# date systems. This is consistent with more recent Microsoft documentation +# (for example, the help file for Excel 2002 which says that the first day +# in the 1904 date system is 1904-01-02). +# +# (5) Usage of the Excel DATE() function may leave strange dates in a spreadsheet. Quoting the help file, +# in respect of the 1900 date system: "If year is between 0 (zero) and 1899 (inclusive), +# Excel adds that value to 1900 to calculate the year. For example, DATE(108,1,2) returns January 2, 2008 (1900+108)." +# This gimmick, semi-defensible only for arguments up to 99 and only in the pre-Y2K-awareness era, +# means that DATE(1899, 12, 31) is interpreted as 3799-12-31. +# +# For further information, please refer to the documentation for the xldate_* functions. +# +# Named references, constants, formulas, and macros +# +# A name is used to refer to a cell, a group of cells, a constant +# value, a formula, or a macro. Usually the scope of a name is global +# across the whole workbook. However it can be local to a worksheet. +# For example, if the sales figures are in different cells in +# different sheets, the user may define the name "Sales" in each +# sheet. There are built-in names, like "Print_Area" and +# "Print_Titles"; these two are naturally local to a sheet. +# +# To inspect the names with a user interface like MS Excel, OOo Calc, +# or Gnumeric, click on Insert/Names/Define. This will show the global +# names, plus those local to the currently selected sheet. +# +# A Book object provides two dictionaries (name_map and +# name_and_scope_map) and a list (name_obj_list) which allow various +# ways of accessing the Name objects. There is one Name object for +# each NAME record found in the workbook. Name objects have many +# attributes, several of which are relevant only when obj.macro is 1. +# +# In the examples directory you will find namesdemo.xls which +# showcases the many different ways that names can be used, and +# xlrdnamesAPIdemo.py which offers 3 different queries for inspecting +# the names in your files, and shows how to extract whatever a name is +# referring to. There is currently one "convenience method", +# Name.cell(), which extracts the value in the case where the name +# refers to a single cell. More convenience methods are planned. The +# source code for Name.cell (in __init__.py) is an extra source of +# information on how the Name attributes hang together. +# +# Name information is **not** extracted from files older than +# Excel 5.0 (Book.biff_version < 50) +# +# Formatting +# +# Introduction +# +# This collection of features, new in xlrd version 0.6.1, is intended +# to provide the information needed to (1) display/render spreadsheet contents +# (say) on a screen or in a PDF file, and (2) copy spreadsheet data to another +# file without losing the ability to display/render it. +# +# The Palette; Colour Indexes +# +# A colour is represented in Excel as a (red, green, blue) ("RGB") tuple +# with each component in range(256). However it is not possible to access an +# unlimited number of colours; each spreadsheet is limited to a palette of 64 different +# colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0). Colours are referenced by an index +# ("colour index") into this palette. +# +# Colour indexes 0 to 7 represent 8 fixed built-in colours: black, white, red, green, blue, +# yellow, magenta, and cyan. +# +# The remaining colours in the palette (8 to 63 in Excel 5.0 and later) +# can be changed by the user. In the Excel 2003 UI, Tools/Options/Color presents a palette +# of 7 rows of 8 colours. The last two rows are reserved for use in charts. +# The correspondence between this grid and the assigned +# colour indexes is NOT left-to-right top-to-bottom. +# Indexes 8 to 15 correspond to changeable +# parallels of the 8 fixed colours -- for example, index 7 is forever cyan; +# index 15 starts off being cyan but can be changed by the user. +# +# The default colour for each index depends on the file version; tables of the defaults +# are available in the source code. If the user changes one or more colours, +# a PALETTE record appears in the XLS file -- it gives the RGB values for *all* changeable +# indexes. +# Note that colours can be used in "number formats": "[CYAN]...." and "[COLOR8]...." refer +# to colour index 7; "[COLOR16]...." will produce cyan +# unless the user changes colour index 15 to something else. +# +# In addition, there are several "magic" colour indexes used by Excel: +# 0x18 (BIFF3-BIFF4), 0x40 (BIFF5-BIFF8): System window text colour for border lines +# (used in XF, CF, and WINDOW2 records) +# 0x19 (BIFF3-BIFF4), 0x41 (BIFF5-BIFF8): System window background colour for pattern background +# (used in XF and CF records ) +# 0x43: System face colour (dialogue background colour) +# 0x4D: System window text colour for chart border lines +# 0x4E: System window background colour for chart areas +# 0x4F: Automatic colour for chart border lines (seems to be always Black) +# 0x50: System ToolTip background colour (used in note objects) +# 0x51: System ToolTip text colour (used in note objects) +# 0x7FFF: System window text colour for fonts (used in FONT and CF records) +# Note 0x7FFF appears to be the *default* colour index. It appears quite often in FONT +# records. +# +# Default Formatting +# +# Default formatting is applied to all empty cells (those not described by a cell record). +# Firstly row default information (ROW record, Rowinfo class) is used if available. +# Failing that, column default information (COLINFO record, Colinfo class) is used if available. +# As a last resort the worksheet/workbook default cell format will be used; this +# should always be present in an Excel file, +# described by the XF record with the fixed index 15 (0-based). By default, it uses the +# worksheet/workbook default cell style, described by the very first XF record (index 0). +# +# Formatting features not included in xlrd version 0.6.1 +# +# - Rich text i.e. strings containing partial bold, italic +# and underlined text, change of font inside a string, etc. +# See OOo docs s3.4 and s3.2 +# - Asian phonetic text (known as "ruby"), used for Japanese furigana. See OOo docs +# s3.4.2 (p15) +# - Conditional formatting. See OOo docs +# s5.12, s6.21 (CONDFMT record), s6.16 (CF record) +# - Miscellaneous sheet-level and book-level items e.g. printing layout, screen panes. +# - Modern Excel file versions don't keep most of the built-in +# "number formats" in the file; Excel loads formats according to the +# user's locale. Currently xlrd's emulation of this is limited to +# a hard-wired table that applies to the US English locale. This may mean +# that currency symbols, date order, thousands separator, decimals separator, etc +# are inappropriate. Note that this does not affect users who are copying XLS +# files, only those who are visually rendering cells. +# +# Loading worksheets on demand +# +# This feature, new in version 0.7.1, is governed by the on_demand argument +# to the open_workbook() function and allows saving memory and time by loading +# only those sheets that the caller is interested in, and releasing sheets +# when no longer required. +# +# on_demand=False (default): No change. open_workbook() loads global data +# and all sheets, releases resources no longer required (principally the +# str or mmap object containing the Workbook stream), and returns. +# +# on_demand=True and BIFF version < 5.0: A warning message is emitted, +# on_demand is recorded as False, and the old process is followed. +# +# on_demand=True and BIFF version >= 5.0: open_workbook() loads global +# data and returns without releasing resources. At this stage, the only +# information available about sheets is Book.nsheets and Book.sheet_names(). +# +# Book.sheet_by_name() and Book.sheet_by_index() will load the requested +# sheet if it is not already loaded. +# +# Book.sheets() will load all/any unloaded sheets. +# +# The caller may save memory by calling +# Book.unload_sheet(sheet_name_or_index) when finished with the sheet. +# This applies irrespective of the state of on_demand. +# +# The caller may re-load an unloaded sheet by calling Book.sheet_by_xxxx() +# -- except if those required resources have been released (which will +# have happened automatically when on_demand is false). This is the only +# case where an exception will be raised. +# +# The caller may query the state of a sheet: +# Book.sheet_loaded(sheet_name_or_index) -> a bool +# +# 2010-12-03 mozman start xlrd3, for changes see NEWS.txt +# +# 2009-04-27 SJM Integrated on_demand patch by Armando Serrano Lombillo +# 2008-11-23 SJM Support dumping FILEPASS and EXTERNNAME records; extra info from SUPBOOK records +# 2008-11-23 SJM colname utility function now supports more than 256 columns +# 2008-04-24 SJM Recovery code for file with out-of-order/missing/wrong CODEPAGE record needed to be called for EXTERNSHEET/BOUNDSHEET/NAME/SHEETHDR records. +# 2008-02-08 SJM Preparation for Excel 2.0 support +# 2008-02-03 SJM Minor tweaks for IronPython support +# 2008-02-02 SJM Previous change stopped dump() and count_records() ... fixed +# 2007-12-25 SJM Decouple Book initialisation & loading -- to allow for multiple loaders. +# 2007-12-20 SJM Better error message for unsupported file format. +# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. +# 2007-11-20 SJM Wasn't handling EXTERNSHEET record that needed CONTINUE record(s) +# 2007-07-07 SJM Version changed to 0.7.0 (alpha 1) +# 2007-07-07 SJM Logfile arg wasn't being passed from open_workbook to compdoc.CompDoc +# 2007-05-21 SJM If no CODEPAGE record in pre-8.0 file, assume ascii and keep going. +# 2007-04-22 SJM Removed antique undocumented Book.get_name_dict method. + +import sys +import time +from struct import unpack +import mmap + +from . import sheet +from . import compdoc +from . import formatting +from .biffh import * +from .xldate import xldate_as_tuple, XLDateError +from .formula import * +from .xfcell import XFCell + +empty_cell = sheet.empty_cell # for exposure to the world ... + +DEBUG = False +USE_FANCY_CD = 1 + +MY_EOF = 0xF00BAAA # not a 16-bit number + +SUPBOOK_UNK = 0 +SUPBOOK_INTERNAL = 1 +SUPBOOK_EXTERNAL = 2 +SUPBOOK_ADDIN = 3 +SUPBOOK_DDEOLE = 4 + +SUPPORTED_VERSIONS = (80, 70, 50, 45, 40, 30, 21, 20) + +code_from_builtin_name = { + "Consolidate_Area": "\x00", + "Auto_Open": "\x01", + "Auto_Close": "\x02", + "Extract": "\x03", + "Database": "\x04", + "Criteria": "\x05", + "Print_Area": "\x06", + "Print_Titles": "\x07", + "Recorder": "\x08", + "Data_Form": "\x09", + "Auto_Activate": "\x0A", + "Auto_Deactivate": "\x0B", + "Sheet_Title": "\x0C", + "_FilterDatabase": "\x0D", + } + +builtin_name_from_code = {} +for _bin, _bic in code_from_builtin_name.items(): + builtin_name_from_code[_bic] = _bin + +# +# Open a spreadsheet file for data extraction. +# +# @param filename The path to the spreadsheet file to be opened. +# +# @param logfile An open file to which messages and diagnostics are written. +# +# @param verbosity Increases the volume of trace material written to the logfile. +# +# @param pickleable Default is true. In Python 2.4 or earlier, setting to false +# will cause use of array.array objects which save some memory but can't be pickled. +# In Python 2.5, array.arrays are used unconditionally. Note: if you have large files that +# you need to read multiple times, it can be much faster to cPickle.dump() the xlrd.Book object +# once, and use cPickle.load() multiple times. +# @param use_mmap Whether to use the mmap module is determined heuristically. +# Use this arg to override the result. Current heuristic: mmap is used if it exists. +# +# @param file_contents ... as a string or an mmap.mmap object or some other behave-alike object. +# If file_contents is supplied, filename will not be used, except (possibly) in messages. +# +# @param encoding_override Used to overcome missing or bad codepage information +# in older-version files. Refer to discussion in the Unicode section above. +# - New in version 0.6.0 +# +# @param formatting_info Governs provision of a reference to an XF (eXtended Format) object +# for each cell in the worksheet. +# Default is False. This is backwards compatible and saves memory. +# "Blank" cells (those with their own formatting information but no data) are treated as empty +# (by ignoring the file's BLANK and MULBLANK records). +# It cuts off any bottom "margin" of rows of empty (and blank) cells and +# any right "margin" of columns of empty (and blank) cells. +# Only cell_value and cell_type are available. +# `True` provides all cells, including empty and blank cells. +# XF information is available for each cell. +# - New in version 0.6.1 +# +# @param on_demand Governs whether sheets are all loaded initially or when demanded +# by the caller. Please refer back to the section "Loading worksheets on demand" for details. +# - New in version 0.7.1 +# +# @return An instance of the Book class. + +def open_workbook(filename=None, + logfile=sys.stdout, + verbosity=0, + pickleable=True, + use_mmap=True, + file_contents=None, + encoding_override=None, + formatting_info=False, + on_demand=False): + t0 = time.clock() + workbook = Book() + workbook.biff2_8_load(filename=filename, + file_contents=file_contents, + logfile=logfile, + verbosity=verbosity, + pickleable=pickleable, + use_mmap=use_mmap, + encoding_override=encoding_override, + formatting_info=formatting_info, + on_demand=on_demand) + t1 = time.clock() + workbook.load_time_stage_1 = t1 - t0 + biff_version = workbook.getbof(XL_WORKBOOK_GLOBALS) + if not biff_version: + raise XLRDError("Can't determine file's BIFF version") + + if biff_version not in SUPPORTED_VERSIONS: + raise XLRDError( "BIFF version %s is not supported" % \ + biff_text_from_num[biff_version]) + + workbook.biff_version = biff_version + + if biff_version <= 40: + # no workbook globals, only 1 worksheet + if on_demand: + fprintf(workbook.logfile, + "*** WARNING: on_demand is not supported for this Excel version.\n" \ + "*** Setting on_demand to False.\n") + workbook.on_demand = on_demand = False + workbook.fake_globals_get_sheet() + elif biff_version == 45: + # worksheet(s) embedded in global stream + workbook.parse_globals() + if on_demand: + fprintf(workbook.logfile, + "*** WARNING: on_demand is not supported for this Excel version.\n" \ + "*** Setting on_demand to False.\n") + workbook.on_demand = on_demand = False + else: + workbook.parse_globals() + workbook._sheet_list = [None for sh in workbook._sheet_names] + if not on_demand: + workbook.get_sheets() + + workbook.nsheets = len(workbook._sheet_list) + if biff_version == 45 and workbook.nsheets > 1: + fprintf(workbook.logfile, + "*** WARNING: Excel 4.0 workbook (.XLW) file contains %d worksheets.\n" \ + "*** Book-level data will be that of the last worksheet.\n", + workbook.nsheets) + + if not on_demand: + workbook.release_resources() + t2 = time.clock() + workbook.load_time_stage_2 = t2 - t1 + return workbook + +# For debugging: dump the file's BIFF records in char & hex. +# @param filename The path to the file to be dumped. +# @param outfile An open file, to which the dump is written. +# @param unnumbered If true, omit offsets (for meaningful diffs). + +def dump(filename, outfile=sys.stdout, unnumbered=False): + workbook = Book() + workbook.biff2_8_load(filename=filename, logfile=outfile, ) + biff_dump(workbook.mem, workbook.base, workbook.stream_len, 0, outfile, unnumbered) + +# For debugging and analysis: summarise the file's BIFF records. +# I.e. produce a sorted file of (record_name, count). +# @param filename The path to the file to be summarised. +# @param outfile An open file, to which the summary is written. + +def count_records(filename, outfile=sys.stdout): + workbook = Book() + workbook.biff2_8_load(filename=filename, logfile=outfile, ) + biff_count_records(workbook.mem, workbook.base, workbook.stream_len, outfile) + +# Information relating to a named reference, formula, macro, etc. +# - New in version 0.6.0 +# - Name information is **not** extracted from files older than +# Excel 5.0 (Book.biff_version < 50) + +class Name(BaseObject): + + _repr_these = ['stack'] + book = None # parent + + # 0 = Visible; 1 = Hidden + hidden = 0 + + # 0 = Command macro; 1 = Function macro. Relevant only if macro == 1 + func = 0 + + # 0 = Sheet macro; 1 = VisualBasic macro. Relevant only if macro == 1 + vbasic = 0 + + # 0 = Standard name; 1 = Macro name + macro = 0 + + # 0 = Simple formula; 1 = Complex formula (array formula or user defined) + # ´No examples have been sighted.´ + complex = 0 + + # 0 = User-defined name; 1 = Built-in name + # (common examples: Print_Area, Print_Titles; see OOo docs for full list) + builtin = 0 + + # Function group. Relevant only if macro == 1; see OOo docs for values. + funcgroup = 0 + + # 0 = Formula definition; 1 = Binary data ´No examples have been sighted.´ + binary = 0 + + # The index of this object in book.name_obj_list + name_index = 0 + + # A Unicode string. If builtin, decoded as per OOo docs. + name = "" + + # An 8-bit string. + raw_formula = "" + + # -1: The name is global (visible in all calculation sheets). + # -2: The name belongs to a macro sheet or VBA sheet. + # -3: The name is invalid. + # 0 <= scope < book.nsheets: The name is local to the sheet whose index is scope. + scope = -1 + + # The result of evaluating the formula, if any. + # If no formula, or evaluation of the formula encountered problems, + # the result is None. Otherwise the result is a single instance of the + # Operand class. + result = None + + # This is a convenience method for the frequent use case where the name + # refers to a single cell. + # @return An instance of the Cell class. + # @throws XLRDError The name is not a constant absolute reference + # to a single cell. + def cell(self): + res = self.result + if res: + # result should be an instance of the Operand class + kind = res.kind + value = res.value + if kind == oREF and len(value) == 1: + ref3d = value[0] + if (0 <= ref3d.shtxlo == ref3d.shtxhi - 1 + and ref3d.rowxlo == ref3d.rowxhi - 1 + and ref3d.colxlo == ref3d.colxhi - 1): + sh = self.book.sheet_by_index(ref3d.shtxlo) + return sh.cell(ref3d.rowxlo, ref3d.colxlo) + self.dump(self.book.logfile, + header="=== Dump of Name object ===", + footer="======= End of dump =======") + raise XLRDError("Not a constant absolute reference to a single cell") + + # This is a convenience method for the use case where the name + # refers to one rectangular area in one worksheet. + # @param clipped If true (the default), the returned rectangle is clipped + # to fit in (0, sheet.nrows, 0, sheet.ncols) -- it is guaranteed that + # 0 <= rowxlo <= rowxhi <= sheet.nrows and that the number of usable rows + # in the area (which may be zero) is rowxhi - rowxlo; likewise for columns. + # @return a tuple (sheet_object, rowxlo, rowxhi, colxlo, colxhi). + # @throws XLRDError The name is not a constant absolute reference + # to a single area in a single sheet. + def area2d(self, clipped=True): + res = self.result + if res: + # result should be an instance of the Operand class + kind = res.kind + value = res.value + if kind == oREF and len(value) == 1: # only 1 reference + ref3d = value[0] + if 0 <= ref3d.shtxlo == ref3d.shtxhi - 1: # only 1 usable sheet + sh = self.book.sheet_by_index(ref3d.shtxlo) + if not clipped: + return sh, ref3d.rowxlo, ref3d.rowxhi, ref3d.colxlo, ref3d.colxhi + rowxlo = min(ref3d.rowxlo, sh.nrows) + rowxhi = max(rowxlo, min(ref3d.rowxhi, sh.nrows)) + colxlo = min(ref3d.colxlo, sh.ncols) + colxhi = max(colxlo, min(ref3d.colxhi, sh.ncols)) + assert 0 <= rowxlo <= rowxhi <= sh.nrows + assert 0 <= colxlo <= colxhi <= sh.ncols + return sh, rowxlo, rowxhi, colxlo, colxhi + self.dump(self.book.logfile, + header="=== Dump of Name object ===", + footer="======= End of dump =======", + ) + raise XLRDError("Not a constant absolute reference to a single area in a single sheet") + +# Contents of a "workbook". +# WARNING: You don't call this class yourself. You use the Book object that +# was returned when you called xlrd.open_workbook("myfile.xls"). + +class Book(BaseObject): + + # The number of worksheets present in the workbook file. + # This information is available even when no sheets have yet been loaded. + nsheets = 0 + + # Which date system was in force when this file was last saved. + # 0 => 1900 system (the Excel for Windows default). + # 1 => 1904 system (the Excel for Macintosh default). + datemode = 0 # In case it's not specified in the file. + + # Version of BIFF (Binary Interchange File Format) used to create the file. + # Latest is 8.0 (represented here as 80), introduced with Excel 97. + # Earliest supported by this module: 2.0 (represented as 20). + biff_version = 0 + + # List containing a Name object for each NAME record in the workbook. + # - New in version 0.6.0 + name_obj_list = [] + + # An integer denoting the character set used for strings in this file. + # For BIFF 8 and later, this will be 1200, meaning Unicode; more precisely, UTF_16_LE. + # For earlier versions, this is used to derive the appropriate Python encoding + # to be used to convert to Unicode. + # Examples: 1252 -> 'cp1252', 10000 -> 'mac_roman' + codepage = None + + # The encoding that was derived from the codepage. + encoding = None + + # A tuple containing the (telephone system) country code for: + # [0]: the user-interface setting when the file was created. + # [1]: the regional settings. + # Example: (1, 61) meaning (USA, Australia). + # This information may give a clue to the correct encoding for an unknown codepage. + # For a long list of observed values, refer to the OpenOffice.org documentation for + # the COUNTRY record. + countries = (0, 0) + + # What (if anything) is recorded as the name of the last user to save the file. + user_name = '' + + # A list of Font class instances, each corresponding to a FONT record. + # - New in version 0.6.1 + font_list = [] + + # A list of XF class instances, each corresponding to an XF record. + # - New in version 0.6.1 + xf_list = [] + + # A list of Format objects, each corresponding to a FORMAT record, in + # the order that they appear in the input file. + # It does not contain builtin formats. + # If you are creating an output file using (for example) pyExcelerator, + # use this list. + # The collection to be used for all visual rendering purposes is format_map. + # - New in version 0.6.1 + format_list = [] + + # The mapping from XF.format_key to Format object. + # - New in version 0.6.1 + format_map = {} + + # This provides access via name to the extended format information for + # both built-in styles and user-defined styles. + # It maps name to (built_in, xf_index), where: + # name is either the name of a user-defined style, + # or the name of one of the built-in styles. Known built-in names are + # Normal, RowLevel_1 to RowLevel_7, + # ColLevel_1 to ColLevel_7, Comma, Currency, Percent, "Comma [0]", + # "Currency [0]", Hyperlink, and "Followed Hyperlink". + # ´built_in´ 1 = built-in style, 0 = user-defined + # ´xf_index´ is an index into Book.xf_list. + # References: OOo docs s6.99 (STYLE record); Excel UI Format/Style + # - New in version 0.6.1 + style_name_map = {} + + # This provides definitions for colour indexes. Please refer to the + # above section "The Palette; Colour Indexes" for an explanation + # of how colours are represented in Excel. + # Colour indexes into the palette map into (red, green, blue) tuples. + # "Magic" indexes e.g. 0x7FFF map to None. + # ´colour_map´ is what you need if you want to render cells on screen or in a PDF + # file. If you are writing an output XLS file, use palette_record. + # - New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True) + colour_map = {} + + # If the user has changed any of the colours in the standard palette, the XLS + # file will contain a PALETTE record with 56 (16 for Excel 4.0 and earlier) + # RGB values in it, and this list will be e.g. [(r0, b0, g0), ..., (r55, b55, g55)]. + # Otherwise this list will be empty. This is what you need if you are + # writing an output XLS file. If you want to render cells on screen or in a PDF + # file, use colour_map. + # - New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True) + palette_record = [] + + # Time in seconds to extract the XLS image as a contiguous string (or mmap equivalent). + load_time_stage_1 = -1.0 + + # Time in seconds to parse the data from the contiguous string (or mmap equivalent). + load_time_stage_2 = -1.0 + + # @return A list of all sheets in the book. + # All sheets not already loaded will be loaded. + def sheets(self): + for sheetx in range(self.nsheets): + if not self._sheet_list[sheetx]: + self.get_sheet(sheetx) + return self._sheet_list[:] + + # @param sheetx Sheet index in range(nsheets) + # @return An object of the Sheet class + def sheet_by_index(self, sheetx): + return self._sheet_list[sheetx] or self.get_sheet(sheetx) + + # @param sheet_name Name of sheet required + # @return An object of the Sheet class + def sheet_by_name(self, sheet_name): + try: + sheetx = self._sheet_names.index(sheet_name) + except ValueError: + raise XLRDError('No sheet named <%r>' % sheet_name) + return self.sheet_by_index(sheetx) + + # @return A list of the names of all the worksheets in the workbook file. + # This information is available even when no sheets have yet been loaded. + def sheet_names(self): + return self._sheet_names[:] + + # @param sheet_name_or_index Name or index of sheet enquired upon + # @return true if sheet is loaded, false otherwise + # -- New in version 0.7.1 + def sheet_loaded(self, sheet_name_or_index): + # using type(1) because int won't work with Python 2.1 + if isinstance(sheet_name_or_index, type(1)): + sheetx = sheet_name_or_index + else: + try: + sheetx = self._sheet_names.index(sheet_name_or_index) + except ValueError: + raise XLRDError('No sheet named <%r>' % sheet_name_or_index) + return self._sheet_list[sheetx] and True or False # Python 2.1 again + + # @param sheet_name_or_index Name or index of sheet to be unloaded. + # - New in version 0.7.1 + def unload_sheet(self, sheet_name_or_index): + # using type(1) because int won't work with Python 2.1 + if isinstance(sheet_name_or_index, type(1)): + sheetx = sheet_name_or_index + else: + try: + sheetx = self._sheet_names.index(sheet_name_or_index) + except ValueError: + raise XLRDError('No sheet named <%r>' % sheet_name_or_index) + self._sheet_list[sheetx] = None + + # A mapping from (lower_case_name, scope) to a single Name object. + # - New in version 0.6.0 + name_and_scope_map = {} + + # A mapping from lower_case_name to a list of Name objects. The list is + # sorted in scope order. Typically there will be one item (of global scope) + # in the list. + # - New in version 0.6.0 + name_map = {} + + def __init__(self): + self._sheet_list = [] + self._sheet_names = [] + self._sheet_visibility = [] # from BOUNDSHEET record + self.nsheets = 0 + self._sh_abs_posn = [] # sheet's absolute position in the stream + self._sharedstrings = [] + self.raw_user_name = False + self._sheethdr_count = 0 # BIFF 4W only + self.builtinfmtcount = -1 # unknown as yet. BIFF 3, 4S, 4W + self.initialise_format_info() + self._all_sheets_count = 0 # includes macro & VBA sheets + self._supbook_count = 0 + self._supbook_locals_inx = None + self._supbook_addins_inx = None + + # maps an all_sheets index to a calc-sheets index (or -1) + self._all_sheets_map = [] + self._externsheet_info = [] + self._externsheet_type_b57 = [] + self._extnsht_name_from_num = {} + self._sheet_num_from_name = {} + self._extnsht_count = 0 + self._supbook_types = [] + self._resources_released = 0 + self.addin_func_names = [] + self.name_obj_list = [] + self.colour_map = {} + self.palette_record = [] + self.xf_list = [] + self.style_name_map = {} + + def biff2_8_load(self, filename=None, + file_contents=None, + logfile=sys.stdout, + verbosity=0, + pickleable=True, + use_mmap=True, + encoding_override=None, + formatting_info=False, + on_demand=False): + + self.logfile = logfile + self.verbosity = verbosity + self.pickleable = pickleable + self.use_mmap = use_mmap + self.encoding_override = encoding_override + self.formatting_info = formatting_info + self.on_demand = on_demand + + need_close_filestr = 0 + if not file_contents: + if self.use_mmap: + open_mode = "r+b" + else: + open_mode = "rb" + retry = False + try: + f = open(filename, open_mode) + except IOError: + e, v = sys.exc_info()[:2] + if open_mode == "r+b" and \ + (v.errno == 13 or v.strerror == "Permission denied"): + # Maybe the file is read-only + retry = True + self.use_mmap = False + else: + raise + if retry: + f = open(filename, "rb") + if self.use_mmap: + f.seek(0, 2) # EOF + size = f.tell() + f.seek(0, 0) # BOF + filestr = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ) + need_close_filestr = 1 + self.stream_len = size + else: + filestr = f.read() + self.stream_len = len(filestr) + f.close() + else: + filestr = file_contents + self.stream_len = len(file_contents) + + self.base = 0 + if filestr[:8] != compdoc.SIGNATURE: + # got this one at the antique store + self.mem = filestr + else: + cd = compdoc.CompDoc(filestr, logfile=self.logfile) + if USE_FANCY_CD: + for qname in ['Workbook', 'Book']: + self.mem, self.base, self.stream_len = cd.locate_named_stream(qname) + if self.mem: break + else: + raise XLRDError("Can't find workbook in OLE2 compound document") + else: + for qname in ['Workbook', 'Book']: + self.mem = cd.get_named_stream(qname) + if self.mem: break + else: + raise XLRDError("Can't find workbook in OLE2 compound document") + self.stream_len = len(self.mem) + del cd + if self.mem is not filestr: + if need_close_filestr: + filestr.close() + del filestr + self._position = self.base + if DEBUG: + print("mem: %s, base: %d, len: %d" % (type(self.mem), self.base, self.stream_len), file=self.logfile) + + def initialise_format_info(self): + # needs to be done once per sheet for BIFF 4W :-( + self.format_map = {} + self.format_list = [] + self.xfcount = 0 + self.actualfmtcount = 0 # number of FORMAT records seen so far + self._xf_index_to_xl_type_map = {} + self._xf_epilogue_done = 0 + self.xf_list = [] + self.font_list = [] + + def release_resources(self): + self._resources_released = 1 + del self.mem + del self._sharedstrings + + def get2bytes(self): + pos = self._position + buff_two = self.mem[pos:pos+2] + lenbuff = len(buff_two) + self._position += lenbuff + if lenbuff < 2: + return MY_EOF + lo, hi = buff_two + return (hi << 8) | lo #(to_py3): + + def get_record_parts(self): + pos = self._position + mem = self.mem + code, length = unpack('= 2: + fprintf(self.logfile, + "BOUNDSHEET: inx=%d vis=%r sheet_name=%r abs_posn=%d sheet_type=0x%02x\n", + self._all_sheets_count, visibility, sheet_name, abs_posn, sheet_type) + self._all_sheets_count += 1 + if sheet_type != XL_BOUNDSHEET_WORKSHEET: + self._all_sheets_map.append(-1) + descr = { + 1: 'Macro sheet', + 2: 'Chart', + 6: 'Visual Basic module', + }.get(sheet_type, 'UNKNOWN') + + fprintf(self.logfile, + "NOTE *** Ignoring non-worksheet data named %r (type 0x%02x = %s)\n", + sheet_name, sheet_type, descr) + else: + snum = len(self._sheet_names) + self._all_sheets_map.append(snum) + self._sheet_names.append(sheet_name) + self._sh_abs_posn.append(abs_posn) + self._sheet_visibility.append(visibility) + self._sheet_num_from_name[sheet_name] = snum + + def handle_builtinfmtcount(self, data): + # N.B. This count appears to be utterly useless. + builtinfmtcount = unpack('= 2: + fprintf(self.logfile, "*** No CODEPAGE record; assuming 1200 (utf_16_le)\n") + else: + codepage = self.codepage + if codepage in encoding_from_codepage: + encoding = encoding_from_codepage[codepage] + elif 300 <= codepage <= 1999: + encoding = 'cp' + str(codepage) + else: + encoding = 'unknown_codepage_' + str(codepage) + if DEBUG or (self.verbosity and encoding != self.encoding) : + fprintf(self.logfile, "CODEPAGE: codepage %r -> encoding %r\n", + codepage, encoding) + self.encoding = encoding + if self.codepage != 1200: # utf_16_le + # If we don't have a codec that can decode ASCII into Unicode, + # we're well & truly stuffed -- let the punter know ASAP. + try: + _unused = str(b'trial', self.encoding) + except: + ei = sys.exc_info()[:2] + fprintf(self.logfile, + "ERROR *** codepage %r -> encoding %r -> %s: %s\n", + self.codepage, self.encoding, ei[0].__name__.split(".")[-1], ei[1]) + raise + if self.raw_user_name: + strg = unpack_string(self.user_name, 0, self.encoding, lenlen=1) + strg = strg.rstrip() + self.user_name = strg + self.raw_user_name = False + return self.encoding + + def handle_codepage(self, data): + codepage = unpack('= 2 + if self.biff_version >= 80: + option_flags, other_info =unpack("= 1 + verbose2 = DEBUG or self.verbosity >= 2 + if self.biff_version >= 80: + num_refs = unpack("= 2: + logf = self.logfile + fprintf(logf, "FILEPASS:\n") + hex_char_dump(data, 0, len(data), base=0, fout=logf) + if self.biff_version >= 80: + kind1, = unpack('= 2 + if self.biff_version < 50: + return + self.derive_encoding() + + # unpack + (option_flags, + kb_shortcut, + name_len, + fmla_len, + extsht_index, + sheet_index, + menu_text_len, + description_text_len, + help_topic_text_len, + status_bar_text_len) = unpack("> nshift) + + macro_flag = " M"[nobj.macro] + if self.biff_version < 80: + internal_name, pos = unpack_string_update_pos(data, 14, self.encoding, known_len=name_len) + else: + internal_name, pos = unpack_unicode_update_pos(data, 14, known_len=name_len) + nobj.extn_sheet_num = extsht_index + nobj.excel_sheet_index = sheet_index + nobj.scope = None # patched up in the names_epilogue() method + if verbose: + print("NAME[%d]:%s oflags=%d, name_len=%d, fmla_len=%d, extsht_index=%d, sheet_index=%d, name=%r" \ + % (name_index, macro_flag, option_flags, name_len, + fmla_len, extsht_index, sheet_index, internal_name)) + name = internal_name + if nobj.builtin: + name = builtin_name_from_code.get(name, "??Unknown??") + if verbose: print(" builtin: %s" % name) + nobj.name = name + nobj.raw_formula = data[pos:] + nobj.basic_formula_len = fmla_len + nobj.evaluated = 0 + if verbose: + nobj.dump(self.logfile, + header="--- handle_name: name[%d] ---" % name_index, + footer="-------------------") + + def names_epilogue(self): + verbose = self.verbosity >= 2 + f = self.logfile + if verbose: + print("+++++ names_epilogue +++++", file=f) + print("_all_sheets_map", self._all_sheets_map, file=f) + print("_extnsht_name_from_num", self._extnsht_name_from_num, file=f) + print("_sheet_num_from_name", self._sheet_num_from_name, file=f) + num_names = len(self.name_obj_list) + for namex in range(num_names): + nobj = self.name_obj_list[namex] + # Convert from excel_sheet_index to scope. + # This is done here because in BIFF7 and earlier, the + # BOUNDSHEET records (from which _all_sheets_map is derived) + # come after the NAME records. + if self.biff_version >= 80: + sheet_index = nobj.excel_sheet_index + if sheet_index == 0: + intl_sheet_index = -1 # global + elif 1 <= sheet_index <= len(self._all_sheets_map): + intl_sheet_index = self._all_sheets_map[sheet_index-1] + if intl_sheet_index == -1: # maps to a macro or VBA sheet + intl_sheet_index = -2 # valid sheet reference but not useful + else: + # huh? + intl_sheet_index = -3 # invalid + elif 50 <= self.biff_version <= 70: + sheet_index = nobj.extn_sheet_num + if sheet_index == 0: + intl_sheet_index = -1 # global + else: + sheet_name = self._extnsht_name_from_num[sheet_index] + intl_sheet_index = self._sheet_num_from_name.get(sheet_name, -2) + nobj.scope = intl_sheet_index + + for namex in range(num_names): + nobj = self.name_obj_list[namex] + # Parse the formula ... + if nobj.macro or nobj.binary: continue + if nobj.evaluated: continue + evaluate_name_formula(self, nobj, namex, verbose=verbose) + + if self.verbosity >= 2: + print("---------- name object dump ----------", file=f) + for namex in range(num_names): + nobj = self.name_obj_list[namex] + nobj.dump(f, header="--- name[%d] ---" % namex) + print("--------------------------------------", file=f) + # + # Build some dicts for access to the name objects + # + name_and_scope_map = {} + name_map = {} + for namex in range(num_names): + nobj = self.name_obj_list[namex] + name_lcase = nobj.name.lower() + key = (name_lcase, nobj.scope) + if key in name_and_scope_map: + msg = 'Duplicate entry %r in name_and_scope_map' % (key, ) + if 0: + raise XLRDError(msg) + else: + if self.verbosity: + print(msg, file=f) + name_and_scope_map[key] = nobj + if name_lcase in name_map: + name_map[name_lcase].append((nobj.scope, nobj)) + else: + name_map[name_lcase] = [(nobj.scope, nobj)] + for key in list(name_map.keys()): + alist = name_map[key] + alist.sort() + name_map[key] = [x[1] for x in alist] + self.name_and_scope_map = name_and_scope_map + self.name_map = name_map + + def handle_obj(self, data): + # Not doing much handling at all. + # Worrying about embedded (BOF ... EOF) substreams is done elsewhere. + obj_type, obj_id = unpack('= 2 + num_sheets = unpack("= 2: + fprintf(self.logfile, "SST: unique strings: %d\n", uniquestrings) + while True: + code, nb, data = self.get_record_parts_conditional(XL_CONTINUE) + if code is None: + break + nbt += nb + if DEBUG: + fprintf(self.logfile, "CONTINUE: adding %d bytes to SST -> %d\n", nb, nbt) + strlist.append(data) + self._sharedstrings = unpack_SST_table(strlist, uniquestrings) + if DEBUG: + t1 = time.time() + print("SST processing took %.2f seconds" % (t1 - t0, ), file=self.logfile) + + def handle_writeaccess(self, data): + if self.biff_version < 80: + if not self.encoding: + self.raw_user_name = True + self.user_name = data + return + strg = unpack_string(data, 0, self.encoding, lenlen=1) + else: + strg = unpack_unicode(data, 0, lenlen=2) + if DEBUG: + print("WRITEACCESS: %d bytes; raw=%d %r" % (len(data), self.raw_user_name, strg), file=self.logfile) + strg = strg.rstrip() + self.user_name = strg + + def parse_globals(self): + # no need to position, just start reading (after the BOF) + formatting.initialise_book(self) + while True: + rc, length, data = self.get_record_parts() + if DEBUG: + print("parse_globals: record code is 0x%04x" % rc) + if rc == XL_SST: + self.handle_sst(data) + elif rc == XL_FONT or rc == XL_FONT_B3B4: + self.handle_font(data) + elif rc == XL_FORMAT: # XL_FORMAT2 is BIFF <= 3.0, can't appear in globals + self.handle_format(data) + elif rc == XL_XF: + self.handle_xf(data) + elif rc == XL_BOUNDSHEET: + self.handle_boundsheet(data) + elif rc == XL_DATEMODE: + self.handle_datemode(data) + elif rc == XL_CODEPAGE: + self.handle_codepage(data) + elif rc == XL_COUNTRY: + self.handle_country(data) + elif rc == XL_EXTERNNAME: + self.handle_externname(data) + elif rc == XL_EXTERNSHEET: + self.handle_externsheet(data) + elif rc == XL_FILEPASS: + self.handle_filepass(data) + elif rc == XL_WRITEACCESS: + self.handle_writeaccess(data) + elif rc == XL_SHEETSOFFSET: + self.handle_sheetsoffset(data) + elif rc == XL_SHEETHDR: + self.handle_sheethdr(data) + elif rc == XL_SUPBOOK: + self.handle_supbook(data) + elif rc == XL_NAME: + self.handle_name(data) + elif rc == XL_PALETTE: + self.handle_palette(data) + elif rc == XL_STYLE: + self.handle_style(data) + elif rc & 0xff == 9: + print("*** Unexpected BOF at posn %d: 0x%04x len=%d data=%r" \ + % (self._position - length - 4, rc, length, data), file=self.logfile) + elif rc == XL_EOF: + self.xf_epilogue() + self.names_epilogue() + self.palette_epilogue() + if not self.encoding: + self.derive_encoding() + if self.biff_version == 45: + if DEBUG: + print("global EOF: position", self._position) + return + + def read(self, pos, length): + data = self.mem[pos:pos+length] + self._position = pos + len(data) + return data + + def getbof(self, rqd_stream): + def bof_error(msg): + raise XLRDError('Unsupported format, or corrupt file: ' + msg) + + if DEBUG: + print("reqd: 0x%04x" % rqd_stream, file=self.logfile) + + savpos = self._position + opcode = self.get2bytes() + + if opcode == MY_EOF: + bof_error('Expected BOF record; met end of file') + + if opcode not in bofcodes: + bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8]) + + length = self.get2bytes() + if length == MY_EOF: + bof_error('Incomplete BOF record[1]; met end of file') + + if length < boflen[opcode] or length > 20: + bof_error('Invalid length (%d) for BOF record type 0x%04x' % (length, opcode)) + + data = self.read(self._position, length); + if DEBUG: print("\ngetbof(): data=%r" % data, file=self.logfile) + if len(data) < length: + bof_error('Incomplete BOF record[2]; met end of file') + version1 = opcode >> 8 + version2, streamtype = unpack('= 2: + print("BOF: op=0x%04x vers=0x%04x stream=0x%04x buildid=%d buildyr=%d -> BIFF%d" \ + % (opcode, version2, streamtype, build, year, version), file=self.logfile) + got_globals = streamtype == XL_WORKBOOK_GLOBALS or ( + version == 45 and streamtype == XL_WORKBOOK_GLOBALS_4W) + if (rqd_stream == XL_WORKBOOK_GLOBALS and got_globals) or streamtype == rqd_stream: + return version + if version < 50 and streamtype == XL_WORKSHEET: + return version + if version >= 50 and streamtype == 0x0100: + bof_error("Workspace file -- no spreadsheet data") + bof_error( + 'BOF not workbook/worksheet: op=0x%04x vers=0x%04x strm=0x%04x build=%d year=%d -> BIFF%d' \ + % (opcode, version2, streamtype, build, year, version)) + +# === helper functions + +def expand_cell_address(inrow, incol): + # Ref : OOo docs, "4.3.4 Cell Addresses in BIFF8" + outrow = inrow + if incol & 0x8000: + if outrow >= 32768: + outrow -= 65536 + relrow = 1 + else: + relrow = 0 + outcol = incol & 0xFF + if incol & 0x4000: + if outcol >= 128: + outcol -= 256 + relcol = 1 + else: + relcol = 0 + return outrow, outcol, relrow, relcol + +def colname(colx, _A2Z="ABCDEFGHIJKLMNOPQRSTUVWXYZ"): + assert colx >= 0 + name = '' + while True: + quot, rem = divmod(colx, 26) + name = _A2Z[rem] + name + if not quot: + return name + colx = quot - 1 + +def display_cell_address(rowx, colx, relrow, relcol): + if relrow: + rowpart = "(*%s%d)" % ("+-"[rowx < 0], abs(rowx)) + else: + rowpart = "$%d" % (rowx+1,) + if relcol: + colpart = "(*%s%d)" % ("+-"[colx < 0], abs(colx)) + else: + colpart = "$" + colname(colx) + return colpart + rowpart + +def unpack_SST_table(datatab, nstrings): + """ Return list of strings """ + datainx = 0 + ndatas = len(datatab) + data = datatab[0] + datalen = len(data) + pos = 8 + strings = [] + strappend = strings.append + local_unpack = unpack + local_min = min + #(to_py3): local_ord = ord + latin_1 = "latin_1" + for _unused_i in range(nstrings): + nchars = local_unpack('> 1, charsneed) + rawstrg = data[pos:pos+2*charsavail] + try: + #(to_py3) accstrg += unicode(rawstrg, "utf_16_le") + accstrg += str(rawstrg, "utf_16_le") + except: + # print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg) + # Probable cause: dodgy data e.g. unfinished surrogate pair. + # E.g. file unicode2.xls in pyExcelerator's examples has cells containing + # unichr(i) for i in range(0x100000) + # so this will include 0xD800 etc + raise + pos += (2 * charsavail) + else: + # Note: this is COMPRESSED (not ASCII!) encoding!!! + charsavail = local_min(datalen - pos, charsneed) + rawstrg = data[pos:pos+charsavail] + #(to_py3) accstrg += unicode(rawstrg, latin_1) + accstrg += str(rawstrg, latin_1) + pos += charsavail + charsgot += charsavail + if charsgot == nchars: + break + datainx += 1 + data = datatab[datainx] + datalen = len(data) + #(to_py3): options = local_ord(data[0]) + options = data[0] + pos = 1 + pos += rtsz # size of richtext & phonetic stuff to skip + # also allow for the rich text etc being split ... + if pos >= datalen: + # adjust to correct position in next record + pos = pos - datalen + datainx += 1 + if datainx < ndatas: + data = datatab[datainx] + datalen = len(data) + else: + assert _unused_i == nstrings - 1 + strappend(accstrg) + return strings diff --git a/tablib/packages/xlrd3/biffh.py b/tablib/packages/xlrd3/biffh.py new file mode 100644 index 0000000..d99e3a3 --- /dev/null +++ b/tablib/packages/xlrd3/biffh.py @@ -0,0 +1,620 @@ +# Support module for the xlrd3 package. +# +# Portions copyright (c) 2005-2008 Stephen John Machin, Lingfo Pty Ltd +# This module is part of the xlrd package, which is released under a +# BSD-style licence. +# +# 2010-12-08 mozman refactoring for python 3 +# 2008-02-10 SJM BIFF2 BLANK record +# 2008-02-08 SJM Preparation for Excel 2.0 support +# 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for +# biff_dump & biff_count +# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. +# 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte. +# 2007-04-22 SJM Remove experimental "trimming" facility. + +import sys +from struct import unpack + +encoding_from_codepage = { + 1200 : 'utf_16_le', + 10000: 'mac_roman', + 10006: 'mac_greek', # guess + 10007: 'mac_cyrillic', # guess + 10029: 'mac_latin2', # guess + 10079: 'mac_iceland', # guess + 10081: 'mac_turkish', # guess + 32768: 'mac_roman', + 32769: 'cp1252', + } + +# some more guessing, for Indic scripts +# codepage 57000 range: +# 2 Devanagari [0] +# 3 Bengali [1] +# 4 Tamil [5] +# 5 Telegu [6] +# 6 Assamese [1] c.f. Bengali +# 7 Oriya [4] +# 8 Kannada [7] +# 9 Malayalam [8] +# 10 Gujarati [3] +# 11 Gurmukhi [2] + +FUN = 0 # unknown +FDT = 1 # date +FNU = 2 # number +FGE = 3 # general +FTX = 4 # text + +DATEFORMAT = FDT +NUMBERFORMAT = FNU + +XL_CELL_EMPTY = 0 +XL_CELL_TEXT = 1 +XL_CELL_NUMBER = 2 +XL_CELL_DATE = 3 +XL_CELL_BOOLEAN = 4 +XL_CELL_ERROR = 5 +XL_CELL_BLANK = 6 # for use in debugging, gathering stats, etc + +biff_text_from_num = { + 0: "(not BIFF)", + 20: "2.0", + 21: "2.1", + 30: "3", + 40: "4S", + 45: "4W", + 50: "5", + 70: "7", + 80: "8", + 85: "8X", +} + +# This dictionary can be used to produce a text version of the internal codes +# that Excel uses for error cells. Here are its contents: +error_text_from_code = { + 0x00: '#NULL!', # Intersection of two cell ranges is empty + 0x07: '#DIV/0!', # Division by zero + 0x0F: '#VALUE!', # Wrong type of operand + 0x17: '#REF!', # Illegal or deleted cell reference + 0x1D: '#NAME?', # Wrong function or range name + 0x24: '#NUM!', # Value range overflow + 0x2A: '#N/A!', # Argument or function not available +} + +BIFF_FIRST_UNICODE = 80 + +XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5 +XL_WORKBOOK_GLOBALS_4W = 0x100 +XL_WORKSHEET = WRKSHEET = 0x10 + +XL_BOUNDSHEET_WORKSHEET = 0x00 +XL_BOUNDSHEET_CHART = 0x02 +XL_BOUNDSHEET_VB_MODULE = 0x06 + +# XL_RK2 = 0x7e +XL_ARRAY = 0x0221 +XL_ARRAY2 = 0x0021 +XL_BLANK = 0x0201 +XL_BLANK_B2 = 0x01 +XL_BOF = 0x809 +XL_BOOLERR = 0x205 +XL_BOOLERR_B2 = 0x5 +XL_BOUNDSHEET = 0x85 +XL_BUILTINFMTCOUNT = 0x56 +XL_CF = 0x01B1 +XL_CODEPAGE = 0x42 +XL_COLINFO = 0x7D +XL_COLUMNDEFAULT = 0x20 # BIFF2 only +XL_COLWIDTH = 0x24 # BIFF2 only +XL_CONDFMT = 0x01B0 +XL_CONTINUE = 0x3c +XL_COUNTRY = 0x8C +XL_DATEMODE = 0x22 +XL_DEFAULTROWHEIGHT = 0x0225 +XL_DEFCOLWIDTH = 0x55 +XL_DIMENSION = 0x200 +XL_DIMENSION2 = 0x0 +XL_EFONT = 0x45 +XL_EOF = 0x0a +XL_EXTERNNAME = 0x23 +XL_EXTERNSHEET = 0x17 +XL_EXTSST = 0xff +XL_FEAT11 = 0x872 +XL_FILEPASS = 0x2f +XL_FONT = 0x31 +XL_FONT_B3B4 = 0x231 +XL_FORMAT = 0x41e +XL_FORMAT2 = 0x1E # BIFF2, BIFF3 +XL_FORMULA = 0x6 +XL_FORMULA3 = 0x206 +XL_FORMULA4 = 0x406 +XL_GCW = 0xab +XL_INDEX = 0x20b +XL_INTEGER = 0x2 # BIFF2 only +XL_IXFE = 0x44 # BIFF2 only +XL_LABEL = 0x204 +XL_LABEL_B2 = 0x04 +XL_LABELRANGES = 0x15f +XL_LABELSST = 0xfd +XL_MERGEDCELLS = 0xE5 +XL_MSO_DRAWING = 0x00EC +XL_MSO_DRAWING_GROUP = 0x00EB +XL_MSO_DRAWING_SELECTION = 0x00ED +XL_MULRK = 0xbd +XL_MULBLANK = 0xbe +XL_NAME = 0x18 +XL_NOTE = 0x1c +XL_NUMBER = 0x203 +XL_NUMBER_B2 = 0x3 +XL_OBJ = 0x5D +XL_PALETTE = 0x92 +XL_RK = 0x27e +XL_ROW = 0x208 +XL_ROW_B2 = 0x08 +XL_RSTRING = 0xd6 +XL_SHEETHDR = 0x8F # BIFF4W only +XL_SHEETSOFFSET = 0x8E # BIFF4W only +XL_SHRFMLA = 0x04bc +XL_SST = 0xfc +XL_STANDARDWIDTH = 0x99 +XL_STRING = 0x207 +XL_STRING_B2 = 0x7 +XL_STYLE = 0x293 +XL_SUPBOOK = 0x1AE +XL_TABLEOP = 0x236 +XL_TABLEOP2 = 0x37 +XL_TABLEOP_B2 = 0x36 +XL_TXO = 0x1b6 +XL_UNCALCED = 0x5e +XL_UNKNOWN = 0xffff +XL_WINDOW2 = 0x023E +XL_WRITEACCESS = 0x5C +XL_XF = 0xe0 +XL_XF2 = 0x0043 # BIFF2 version of XF record +XL_XF3 = 0x0243 # BIFF3 version of XF record +XL_XF4 = 0x0443 # BIFF4 version of XF record + +boflen = { + 0x0809: 8, + 0x0409: 6, + 0x0209: 6, + 0x0009: 4, +} + +bofcodes = (0x0809, 0x0409, 0x0209, 0x0009) + +XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206) + +_cell_opcode_list = ( + XL_BOOLERR, + XL_FORMULA, + XL_FORMULA3, + XL_FORMULA4, + XL_LABEL, + XL_LABELSST, + XL_MULRK, + XL_NUMBER, + XL_RK, + XL_RSTRING, +) + +biff_rec_name_dict = { + 0x0000: 'DIMENSIONS_B2', + 0x0001: 'BLANK_B2', + 0x0002: 'INTEGER_B2_ONLY', + 0x0003: 'NUMBER_B2', + 0x0004: 'LABEL_B2', + 0x0005: 'BOOLERR_B2', + 0x0006: 'FORMULA', + 0x0007: 'STRING_B2', + 0x0008: 'ROW_B2', + 0x0009: 'BOF_B2', + 0x000A: 'EOF', + 0x000B: 'INDEX_B2_ONLY', + 0x000C: 'CALCCOUNT', + 0x000D: 'CALCMODE', + 0x000E: 'PRECISION', + 0x000F: 'REFMODE', + 0x0010: 'DELTA', + 0x0011: 'ITERATION', + 0x0012: 'PROTECT', + 0x0013: 'PASSWORD', + 0x0014: 'HEADER', + 0x0015: 'FOOTER', + 0x0016: 'EXTERNCOUNT', + 0x0017: 'EXTERNSHEET', + 0x0018: 'NAME_B2,5+', + 0x0019: 'WINDOWPROTECT', + 0x001A: 'VERTICALPAGEBREAKS', + 0x001B: 'HORIZONTALPAGEBREAKS', + 0x001C: 'NOTE', + 0x001D: 'SELECTION', + 0x001E: 'FORMAT_B2-3', + 0x001F: 'BUILTINFMTCOUNT_B2', + 0x0020: 'COLUMNDEFAULT_B2_ONLY', + 0x0021: 'ARRAY_B2_ONLY', + 0x0022: 'DATEMODE', + 0x0023: 'EXTERNNAME', + 0x0024: 'COLWIDTH_B2_ONLY', + 0x0025: 'DEFAULTROWHEIGHT_B2_ONLY', + 0x0026: 'LEFTMARGIN', + 0x0027: 'RIGHTMARGIN', + 0x0028: 'TOPMARGIN', + 0x0029: 'BOTTOMMARGIN', + 0x002A: 'PRINTHEADERS', + 0x002B: 'PRINTGRIDLINES', + 0x002F: 'FILEPASS', + 0x0031: 'FONT', + 0x0032: 'FONT2_B2_ONLY', + 0x0036: 'TABLEOP_B2', + 0x0037: 'TABLEOP2_B2', + 0x003C: 'CONTINUE', + 0x003D: 'WINDOW1', + 0x003E: 'WINDOW2_B2', + 0x0040: 'BACKUP', + 0x0041: 'PANE', + 0x0042: 'CODEPAGE', + 0x0043: 'XF_B2', + 0x0044: 'IXFE_B2_ONLY', + 0x0045: 'EFONT_B2_ONLY', + 0x004D: 'PLS', + 0x0051: 'DCONREF', + 0x0055: 'DEFCOLWIDTH', + 0x0056: 'BUILTINFMTCOUNT_B3-4', + 0x0059: 'XCT', + 0x005A: 'CRN', + 0x005B: 'FILESHARING', + 0x005C: 'WRITEACCESS', + 0x005D: 'OBJECT', + 0x005E: 'UNCALCED', + 0x005F: 'SAVERECALC', + 0x0063: 'OBJECTPROTECT', + 0x007D: 'COLINFO', + 0x007E: 'RK2_mythical_?', + 0x0080: 'GUTS', + 0x0081: 'WSBOOL', + 0x0082: 'GRIDSET', + 0x0083: 'HCENTER', + 0x0084: 'VCENTER', + 0x0085: 'BOUNDSHEET', + 0x0086: 'WRITEPROT', + 0x008C: 'COUNTRY', + 0x008D: 'HIDEOBJ', + 0x008E: 'SHEETSOFFSET', + 0x008F: 'SHEETHDR', + 0x0090: 'SORT', + 0x0092: 'PALETTE', + 0x0099: 'STANDARDWIDTH', + 0x009B: 'FILTERMODE', + 0x009C: 'FNGROUPCOUNT', + 0x009D: 'AUTOFILTERINFO', + 0x009E: 'AUTOFILTER', + 0x00A0: 'SCL', + 0x00A1: 'SETUP', + 0x00AB: 'GCW', + 0x00BD: 'MULRK', + 0x00BE: 'MULBLANK', + 0x00C1: 'MMS', + 0x00D6: 'RSTRING', + 0x00D7: 'DBCELL', + 0x00DA: 'BOOKBOOL', + 0x00DD: 'SCENPROTECT', + 0x00E0: 'XF', + 0x00E1: 'INTERFACEHDR', + 0x00E2: 'INTERFACEEND', + 0x00E5: 'MERGEDCELLS', + 0x00E9: 'BITMAP', + 0x00EB: 'MSO_DRAWING_GROUP', + 0x00EC: 'MSO_DRAWING', + 0x00ED: 'MSO_DRAWING_SELECTION', + 0x00EF: 'PHONETIC', + 0x00FC: 'SST', + 0x00FD: 'LABELSST', + 0x00FF: 'EXTSST', + 0x013D: 'TABID', + 0x015F: 'LABELRANGES', + 0x0160: 'USESELFS', + 0x0161: 'DSF', + 0x01AE: 'SUPBOOK', + 0x01AF: 'PROTECTIONREV4', + 0x01B0: 'CONDFMT', + 0x01B1: 'CF', + 0x01B2: 'DVAL', + 0x01B6: 'TXO', + 0x01B7: 'REFRESHALL', + 0x01B8: 'HLINK', + 0x01BC: 'PASSWORDREV4', + 0x01BE: 'DV', + 0x01C0: 'XL9FILE', + 0x01C1: 'RECALCID', + 0x0200: 'DIMENSIONS', + 0x0201: 'BLANK', + 0x0203: 'NUMBER', + 0x0204: 'LABEL', + 0x0205: 'BOOLERR', + 0x0206: 'FORMULA_B3', + 0x0207: 'STRING', + 0x0208: 'ROW', + 0x0209: 'BOF', + 0x020B: 'INDEX_B3+', + 0x0218: 'NAME', + 0x0221: 'ARRAY', + 0x0223: 'EXTERNNAME_B3-4', + 0x0225: 'DEFAULTROWHEIGHT', + 0x0231: 'FONT_B3B4', + 0x0236: 'TABLEOP', + 0x023E: 'WINDOW2', + 0x0243: 'XF_B3', + 0x027E: 'RK', + 0x0293: 'STYLE', + 0x0406: 'FORMULA_B4', + 0x0409: 'BOF', + 0x041E: 'FORMAT', + 0x0443: 'XF_B4', + 0x04BC: 'SHRFMLA', + 0x0800: 'QUICKTIP', + 0x0809: 'BOF', + 0x0862: 'SHEETLAYOUT', + 0x0867: 'SHEETPROTECTION', + 0x0868: 'RANGEPROTECTION', +} + +class XLRDError(Exception): + pass + +class BaseObject: + """ + Parent of almost all other classes in the package. Defines a common + 'dump' method for debugging. + """ + _repr_these = [] + + def dump(self, f=None, header=None, footer=None, indent=0): + """ + :param f: open file object, to which the dump is written + :param header: text to write before the dump + :param footer: text to write after the dump + :param indent: number of leading spaces (for recursive calls) + """ + if f is None: + f = sys.stderr + pad = " " * indent + + if header is not None: + print(header, file=f) + + for attr, value in sorted(self.__dict__.items()): + if getattr(value, 'dump', None) and attr != 'book': + value.dump(f, + header="%s%s (%s object):" % (pad, attr, value.__class__.__name__), + indent=indent+4) + elif attr not in self._repr_these and \ + (isinstance(value, list) or + isinstance(value, dict)): + print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f) + else: + print("%s%s: %r" % (pad, attr, value), file=f) + if footer is not None: + print(footer, file=f) + +def fprintf(f, fmt, *vargs): + print(fmt.rstrip('\n') % vargs, file=f) + +def upkbits(tgt_obj, src, manifest, local_setattr=setattr): + for n, mask, attr in manifest: + local_setattr(tgt_obj, attr, (src & mask) >> n) + +def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int): + for n, mask, attr in manifest: + local_setattr(tgt_obj, attr, local_int((src & mask) >> n)) + +def unpack_string(data, pos, encoding, lenlen=1): + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + pos += lenlen + return str(data[pos:pos+nchars], encoding) + +def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None): + if known_len is not None: + # On a NAME record, the length byte is detached from the front of the string. + nchars = known_len + else: + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + pos += lenlen + + newpos = pos + nchars + return (str(data[pos:newpos], encoding), newpos) + +def unpack_unicode(data, pos, lenlen=2): + """ Return unicode_strg """ + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + if not nchars: + # Ambiguous whether 0-length string should have an "options" byte. + # Avoid crash if missing. + return "" + pos += lenlen + options = data[pos] + pos += 1 + + if options & 0x08: # richtext + pos += 2 + + if options & 0x04: # phonetic + pos += 4 + + if options & 0x01: + # Uncompressed UTF-16-LE + rawstrg = data[pos:pos+2*nchars] + strg = str(rawstrg, 'utf_16_le') + else: + # Note: this is COMPRESSED (not ASCII!) encoding!!! + # Merely returning the raw bytes would work OK 99.99% of the time + # if the local codepage was cp1252 -- however this would rapidly go pear-shaped + # for other codepages so we grit our Anglocentric teeth and return Unicode :-) + strg = str(data[pos:pos+nchars], "latin_1") + return strg + +def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None): + """ Return (unicode_strg, updated value of pos) """ + if known_len is not None: + # On a NAME record, the length byte is detached from the front of the string. + nchars = known_len + else: + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + pos += lenlen + + if not nchars and not data[pos:]: + # Zero-length string with no options byte + return ("", pos) + + options = data[pos] + pos += 1 + phonetic = options & 0x04 + richtext = options & 0x08 + + if richtext: + rt = unpack(' endpos=%d pos=%d endsub=%d substrg=%r\n', + ofs, dlen, base, endpos, pos, endsub, substrg) + break + hexd = ''.join(["%02x " % c for c in substrg]) + chard = '' + for c in substrg: + if c == ord('\0'): + c = '~' + elif not (' ' <= chr(c) <= '~'): + c = '?' + if isinstance(c, int): + c = chr(c) + chard += c + if numbered: + num_prefix = "%5d: " % (base+pos-ofs) + fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard) + pos = endsub + +def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, + unnumbered=False): + pos = stream_offset + stream_end = stream_offset + stream_len + adj = base - stream_offset + dummies = 0 + numbered = not unnumbered + num_prefix = '' + while stream_end - pos >= 4: + rc, length = unpack('') + if numbered: + num_prefix = "%5d: " % (adj + pos) + fprintf(fout, "%s%04x %s len = %04x (%d)\n", + num_prefix, rc, recname, length, length) + pos += 4 + hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered) + pos += length + if dummies: + if numbered: + num_prefix = "%5d: " % (adj + savpos) + fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies) + + if pos < stream_end: + if numbered: + num_prefix = "%5d: " % (adj + pos) + fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix) + hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered) + elif pos > stream_end: + fprintf(fout, "Last dumped record has length (%d) that is too large\n", length) + +def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout): + pos = stream_offset + stream_end = stream_offset + stream_len + tally = {} + while stream_end - pos >= 4: + rc, length = unpack(' can't open files on bigendian platforms. + + +import sys +from struct import unpack + +# Magic cookie that should appear in the first 8 bytes of the file. +SIGNATURE = b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" + +EOCSID = -2 +FREESID = -1 +SATSID = -3 +MSATSID = -4 + +def int_floor_div(x, y): + return divmod(x, y)[0] + +class CompDocError(Exception): + pass + +class DirNode(object): + + def __init__(self, DID, dent, DEBUG=0): + # dent is the 128-byte directory entry + self.DID = DID + # (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID, + # self.root_DID, + # self.first_SID, + # self.tot_size) = \ + # unpack(' 20: # allows for 2**20 bytes i.e. 1MB + print("WARNING: sector size (2**%d) is preposterous; assuming 512 and continuing ..." \ + % ssz, file=logfile) + ssz = 9 + if sssz > ssz: + print("WARNING: short stream sector size (2**%d) is preposterous; assuming 64 and continuing ..." \ + % sssz, file=logfile) + sssz = 6 + self.sec_size = sec_size = 1 << ssz + self.short_sec_size = 1 << sssz + ( + SAT_tot_secs, self.dir_first_sec_sid, _unused, self.min_size_std_stream, + SSAT_first_sec_sid, SSAT_tot_secs, + MSAT_first_sec_sid, MSAT_tot_secs, + # ) = unpack('= 0: + if sid >= mem_data_secs: + raise CompDocError( + "MSAT extension: accessing sector %d but only %d in file" % (sid, mem_data_secs) + ) + offset = 512 + sec_size * sid + news = list(unpack(fmt, mem[offset:offset+sec_size])) + sid = news.pop() + MSAT.extend(news) + if DEBUG: + print("MSAT: len =", len(MSAT), file=logfile) + print(MSAT, file=logfile) + # + # === build the SAT === + # + self.SAT = [] + for msid in MSAT: + if msid == FREESID: continue + if msid >= mem_data_secs: + if not trunc_warned: + print("WARNING *** File is truncated, or OLE2 MSAT is corrupt!!", file=logfile) + print("INFO: Trying to access sector %d but only %d available" \ + % (msid, mem_data_secs), file=logfile) + trunc_warned = 1 + continue + offset = 512 + sec_size * msid + news = list(unpack(fmt, mem[offset:offset+sec_size])) + self.SAT.extend(news) + if DEBUG: + print("SAT: len =", len(self.SAT), file=logfile) + print(self.SAT, file=logfile) + # print >> logfile, "SAT ", + # for i, s in enumerate(self.SAT): + # print >> logfile, "entry: %4d offset: %6d, next entry: %4d" % (i, 512 + sec_size * i, s) + # print >> logfile, "%d:%d " % (i, s), + print() + + # === build the directory === + # + dbytes = self._get_stream( + self.mem, 512, self.SAT, self.sec_size, self.dir_first_sec_sid, + name="directory") + dirlist = [] + did = -1 + for pos in range(0, len(dbytes), 128): + did += 1 + dirlist.append(DirNode(did, dbytes[pos:pos+128], 0)) + self.dirlist = dirlist + _build_family_tree(dirlist, 0, dirlist[0].root_DID) # and stand well back ... + if DEBUG: + for d in dirlist: + d.dump(DEBUG) + # + # === get the SSCS === + # + sscs_dir = self.dirlist[0] + assert sscs_dir.etype == 5 # root entry + if sscs_dir.first_SID < 0 and sscs_dir.tot_size == 0: + # Problem reported by Frank Hoffsuemmer: some software was + # writing -1 instead of -2 (EOCSID) for the first_SID + # when the SCCS was empty. Not having EOCSID caused assertion + # failure in _get_stream. + # Solution: avoid calling _get_stream in any case when the + # SCSS appears to be empty. + self.SSCS = "" + else: + self.SSCS = self._get_stream( + self.mem, 512, self.SAT, sec_size, sscs_dir.first_SID, + sscs_dir.tot_size, name="SSCS") + # if DEBUG: print >> logfile, "SSCS", repr(self.SSCS) + # + # === build the SSAT === + # + self.SSAT = [] + if SSAT_tot_secs > 0 and sscs_dir.tot_size == 0: + print("WARNING *** OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero", file=logfile) + if sscs_dir.tot_size > 0: + sid = SSAT_first_sec_sid + nsecs = SSAT_tot_secs + while sid >= 0 and nsecs > 0: + nsecs -= 1 + start_pos = 512 + sid * sec_size + news = list(unpack(fmt, mem[start_pos:start_pos+sec_size])) + self.SSAT.extend(news) + sid = self.SAT[sid] + # assert SSAT_tot_secs == 0 or sid == EOCSID + if DEBUG: print("SSAT last sid %d; remaining sectors %d" % (sid, nsecs), file=logfile) + assert nsecs == 0 and sid == EOCSID + if DEBUG: print("SSAT", self.SSAT, file=logfile) + + def _get_stream(self, mem, base, sat, sec_size, start_sid, size=None, name=''): + # print >> self.logfile, "_get_stream", base, sec_size, start_sid, size + sectors = [] + s = start_sid + if size is None: + # nothing to check against + while s >= 0: + start_pos = base + s * sec_size + sectors.append(mem[start_pos:start_pos+sec_size]) + try: + s = sat[s] + except IndexError: + raise CompDocError( + "OLE2 stream %r: sector allocation table invalid entry (%d)" % + (name, s) + ) + assert s == EOCSID + else: + todo = size + while s >= 0: + start_pos = base + s * sec_size + grab = sec_size + if grab > todo: + grab = todo + todo -= grab + sectors.append(mem[start_pos:start_pos+grab]) + try: + s = sat[s] + except IndexError: + raise CompDocError( + "OLE2 stream %r: sector allocation table invalid entry (%d)" % + (name, s) + ) + assert s == EOCSID + if todo != 0: + print("WARNING *** OLE2 stream %r: expected size %d, actual size %d" \ + % (name, size, size - todo), file=self.logfile) + return b''.join(sectors) + + def _dir_search(self, path, storage_DID=0): + # Return matching DirNode instance, or None + head = path[0] + tail = path[1:] + dl = self.dirlist + for child in dl[storage_DID].children: + if dl[child].name.lower() == head.lower(): + et = dl[child].etype + if et == 2: + return dl[child] + if et == 1: + if not tail: + raise CompDocError("Requested component is a 'storage'") + return self._dir_search(tail, child) + dl[child].dump(1) + raise CompDocError("Requested stream is not a 'user stream'") + return None + + ## + # Interrogate the compound document's directory; return the stream as a string if found, otherwise + # return None. + # @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto. + + def get_named_stream(self, qname): + d = self._dir_search(qname.split("/")) + if d is None: + return None + if d.tot_size >= self.min_size_std_stream: + return self._get_stream( + self.mem, 512, self.SAT, self.sec_size, d.first_SID, + d.tot_size, name=qname) + else: + return self._get_stream( + self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID, + d.tot_size, name=qname + " (from SSCS)") + + # Interrogate the compound document's directory. + # If the named stream is not found, (None, 0, 0) will be returned. + # If the named stream is found and is contiguous within the original byte sequence ("mem") + # used when the document was opened, + # then (mem, offset_to_start_of_stream, length_of_stream) is returned. + # Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned. + # @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto. + + def locate_named_stream(self, qname): + d = self._dir_search(qname.split("/")) + if d is None: + return (None, 0, 0) + if d.tot_size >= self.min_size_std_stream: + return self._locate_stream(self.mem, 512, self.SAT, self.sec_size, d.first_SID, d.tot_size) + else: + return ( + self._get_stream( + self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID, + d.tot_size, qname + " (from SSCS)"), + 0, + d.tot_size + ) + return (None, 0, 0) # not found + + def _locate_stream(self, mem, base, sat, sec_size, start_sid, size): + # print >> self.logfile, "_locate_stream", base, sec_size, start_sid, size + s = start_sid + if s < 0: + raise CompDocError("_locate_stream: start_sid (%d) is -ve" % start_sid) + p = -99 # dummy previous SID + start_pos = -9999 + end_pos = -8888 + slices = [] + while s >= 0: + if s == p+1: + # contiguous sectors + end_pos += sec_size + else: + # start new slice + if p >= 0: + # not first time + slices.append((start_pos, end_pos)) + start_pos = base + s * sec_size + end_pos = start_pos + sec_size + p = s + s = sat[s] + assert s == EOCSID + # print >> self.logfile, len(slices) + 1, "slices" + if not slices: + # The stream is contiguous ... just what we like! + return (mem, start_pos, size) + slices.append((start_pos, end_pos)) + return (b''.join([mem[start_pos:end_pos] for start_pos, end_pos in slices]), 0, size) diff --git a/tablib/packages/xlrd3/formatting.py b/tablib/packages/xlrd3/formatting.py new file mode 100644 index 0000000..85484da --- /dev/null +++ b/tablib/packages/xlrd3/formatting.py @@ -0,0 +1,1186 @@ +# Module for formatting information. +# +# Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd +# Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd +# This module is part of the xlrd3 package, which is released under +# a BSD-style licence. + +# No part of the content of this file was derived from the works of David Giffin. + +# 2009-05-31 SJM Fixed problem with non-zero reserved bits in some STYLE records in Mac Excel files +# 2008-08-03 SJM Ignore PALETTE record when Book.formatting_info is false +# 2008-08-03 SJM Tolerate up to 4 bytes trailing junk on PALETTE record +# 2008-05-10 SJM Do some XF checks only when Book.formatting_info is true +# 2008-02-08 SJM Preparation for Excel 2.0 support +# 2008-02-03 SJM Another tweak to is_date_format_string() +# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. +# 2007-10-13 SJM Warning: style XF whose parent XF index != 0xFFF +# 2007-09-08 SJM Work around corrupt STYLE record +# 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file + +DEBUG = False +import copy +import re +from struct import unpack + +from .biffh import BaseObject, unpack_unicode, unpack_string, \ + upkbits, upkbitsL, fprintf, \ + FUN, FDT, FNU, FGE, FTX, XL_CELL_NUMBER, XL_CELL_DATE, \ + XL_FORMAT, XL_FORMAT2, \ + XLRDError + + +excel_default_palette_b5 = ( + ( 0, 0, 0), (255, 255, 255), (255, 0, 0), ( 0, 255, 0), + ( 0, 0, 255), (255, 255, 0), (255, 0, 255), ( 0, 255, 255), + (128, 0, 0), ( 0, 128, 0), ( 0, 0, 128), (128, 128, 0), + (128, 0, 128), ( 0, 128, 128), (192, 192, 192), (128, 128, 128), + (153, 153, 255), (153, 51, 102), (255, 255, 204), (204, 255, 255), + (102, 0, 102), (255, 128, 128), ( 0, 102, 204), (204, 204, 255), + ( 0, 0, 128), (255, 0, 255), (255, 255, 0), ( 0, 255, 255), + (128, 0, 128), (128, 0, 0), ( 0, 128, 128), ( 0, 0, 255), + ( 0, 204, 255), (204, 255, 255), (204, 255, 204), (255, 255, 153), + (153, 204, 255), (255, 153, 204), (204, 153, 255), (227, 227, 227), + ( 51, 102, 255), ( 51, 204, 204), (153, 204, 0), (255, 204, 0), + (255, 153, 0), (255, 102, 0), (102, 102, 153), (150, 150, 150), + ( 0, 51, 102), ( 51, 153, 102), ( 0, 51, 0), ( 51, 51, 0), + (153, 51, 0), (153, 51, 102), ( 51, 51, 153), ( 51, 51, 51), + ) + +excel_default_palette_b2 = excel_default_palette_b5[:16] + +# Following two tables borrowed from Gnumeric 1.4 source. +excel_default_palette_b5_gnumeric_14 = ( + #### dodgy; didn't match Excel results + ( 0, 0, 0), (255,255,255), (255, 0, 0), ( 0,255, 0), + ( 0, 0,255), (255,255, 0), (255, 0,255), ( 0,255,255), + (128, 0, 0), ( 0,128, 0), ( 0, 0,128), (128,128, 0), + (128, 0,128), ( 0,128,128), (192,192,192), (128,128,128), + (128,128,255), (128, 32, 96), (255,255,192), (160,224,224), + ( 96, 0,128), (255,128,128), ( 0,128,192), (192,192,255), + ( 0, 0,128), (255, 0,255), (255,255, 0), ( 0,255,255), + (128, 0,128), (128, 0, 0), ( 0,128,128), ( 0, 0,255), + ( 0,204,255), (105,255,255), (204,255,204), (255,255,153), + (166,202,240), (204,156,204), (204,153,255), (227,227,227), + ( 51,102,255), ( 51,204,204), ( 51,153, 51), (153,153, 51), + (153,102, 51), (153,102,102), (102,102,153), (150,150,150), + ( 51, 51,204), ( 51,102,102), ( 0, 51, 0), ( 51, 51, 0), + (102, 51, 0), (153, 51,102), ( 51, 51,153), ( 66, 66, 66), + ) +excel_default_palette_b8 = ( # (red, green, blue) + ( 0, 0, 0), (255,255,255), (255, 0, 0), ( 0,255, 0), + ( 0, 0,255), (255,255, 0), (255, 0,255), ( 0,255,255), + (128, 0, 0), ( 0,128, 0), ( 0, 0,128), (128,128, 0), + (128, 0,128), ( 0,128,128), (192,192,192), (128,128,128), + (153,153,255), (153, 51,102), (255,255,204), (204,255,255), + (102, 0,102), (255,128,128), ( 0,102,204), (204,204,255), + ( 0, 0,128), (255, 0,255), (255,255, 0), ( 0,255,255), + (128, 0,128), (128, 0, 0), ( 0,128,128), ( 0, 0,255), + ( 0,204,255), (204,255,255), (204,255,204), (255,255,153), + (153,204,255), (255,153,204), (204,153,255), (255,204,153), + ( 51,102,255), ( 51,204,204), (153,204, 0), (255,204, 0), + (255,153, 0), (255,102, 0), (102,102,153), (150,150,150), + ( 0, 51,102), ( 51,153,102), ( 0, 51, 0), ( 51, 51, 0), + (153, 51, 0), (153, 51,102), ( 51, 51,153), ( 51, 51, 51), + ) + +default_palette = { + 80: excel_default_palette_b8, + 70: excel_default_palette_b5, + 50: excel_default_palette_b5, + 45: excel_default_palette_b2, + 40: excel_default_palette_b2, + 30: excel_default_palette_b2, + 21: excel_default_palette_b2, + 20: excel_default_palette_b2, + } + +""" +00H = Normal +01H = RowLevel_lv (see next field) +02H = ColLevel_lv (see next field) +03H = Comma +04H = Currency +05H = Percent +06H = Comma [0] (BIFF4-BIFF8) +07H = Currency [0] (BIFF4-BIFF8) +08H = Hyperlink (BIFF8) +09H = Followed Hyperlink (BIFF8) +""" +built_in_style_names = [ + "Normal", + "RowLevel_", + "ColLevel_", + "Comma", + "Currency", + "Percent", + "Comma [0]", + "Currency [0]", + "Hyperlink", + "Followed Hyperlink", + ] + +def initialise_colour_map(book): + book.colour_map = {} + book.colour_indexes_used = {} + if not book.formatting_info: + return + # Add the 8 invariant colours + for i in range(8): + book.colour_map[i] = excel_default_palette_b8[i] + # Add the default palette depending on the version + dpal = default_palette[book.biff_version] + ndpal = len(dpal) + for i in range(ndpal): + book.colour_map[i+8] = dpal[i] + # Add the specials -- None means the RGB value is not known + # System window text colour for border lines + book.colour_map[ndpal+8] = None + # System window background colour for pattern background + book.colour_map[ndpal+8+1] = None # + for ci in ( + 0x51, # System ToolTip text colour (used in note objects) + 0x7FFF, # 32767, system window text colour for fonts + ): + book.colour_map[ci] = None + +def nearest_colour_index(colour_map, rgb, debug=0): + # General purpose function. Uses Euclidean distance. + # So far used only for pre-BIFF8 WINDOW2 record. + # Doesn't have to be fast. + # Doesn't have to be fancy. + best_metric = 3 * 256 * 256 + best_colourx = 0 + for colourx, cand_rgb in list(colour_map.items()): + if cand_rgb is None: + continue + metric = 0 + for v1, v2 in zip(rgb, cand_rgb): + metric += (v1 - v2) * (v1 - v2) + if metric < best_metric: + best_metric = metric + best_colourx = colourx + if metric == 0: + break + if debug: + print("nearest_colour_index for %r is %r -> %r; best_metric is %d" \ + % (rgb, best_colourx, colour_map[best_colourx], best_metric)) + return best_colourx + +# This mixin class exists solely so that Format, Font, and XF.... objects +# can be compared by value of their attributes. +class EqNeAttrs(object): + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + return self.__dict__ != other.__dict__ + +# An Excel "font" contains the details of not only what is normally +# considered a font, but also several other display attributes. +# Items correspond to those in the Excel UI's Format/Cells/Font tab. +# - New in version 0.6.1 +class Font(BaseObject, EqNeAttrs): + # 1 = Characters are bold. Redundant; see "weight" attribute. + bold = 0 + + # Values: 0 = ANSI Latin, 1 = System default, 2 = Symbol, + # 77 = Apple Roman, + # 128 = ANSI Japanese Shift-JIS, + # 129 = ANSI Korean (Hangul), + # 130 = ANSI Korean (Johab), + # 134 = ANSI Chinese Simplified GBK, + # 136 = ANSI Chinese Traditional BIG5, + # 161 = ANSI Greek, + # 162 = ANSI Turkish, + # 163 = ANSI Vietnamese, + # 177 = ANSI Hebrew, + # 178 = ANSI Arabic, + # 186 = ANSI Baltic, + # 204 = ANSI Cyrillic, + # 222 = ANSI Thai, + # 238 = ANSI Latin II (Central European), + # 255 = OEM Latin I + character_set = 0 + # An explanation of "colour index" is given in the Formatting + # section at the start of this document. + colour_index = 0 + # 1 = Superscript, 2 = Subscript. + escapement = 0 + # 0 = None (unknown or don't care)
+ # 1 = Roman (variable width, serifed)
+ # 2 = Swiss (variable width, sans-serifed)
+ # 3 = Modern (fixed width, serifed or sans-serifed)
+ # 4 = Script (cursive)
+ # 5 = Decorative (specialised, for example Old English, Fraktur) + family = 0 + # The 0-based index used to refer to this Font() instance. + # Note that index 4 is never used; xlrd supplies a dummy place-holder. + font_index = 0 + # Height of the font (in twips). A twip = 1/20 of a point. + height = 0 + # 1 = Characters are italic. + italic = 0 + # The name of the font. Example: u"Arial" + name = "" + # 1 = Characters are struck out. + struck_out = 0 + # 0 = None
+ # 1 = Single; 0x21 (33) = Single accounting
+ # 2 = Double; 0x22 (34) = Double accounting + underline_type = 0 + # 1 = Characters are underlined. Redundant; see "underline_type" attribute. + underlined = 0 + # Font weight (100-1000). Standard values are 400 for normal text + # and 700 for bold text. + weight = 400 + # 1 = Font is outline style (Macintosh only) + outline = 0 + # 1 = Font is shadow style (Macintosh only) + shadow = 0 + + # No methods ... + +def handle_efont(book, data): # BIFF2 only + if not book.formatting_info: + return + book.font_list[-1].colour_index = unpack('= 2 + bv = book.biff_version + k = len(book.font_list) + if k == 4: + f = Font() + f.name = 'Dummy Font' + f.font_index = k + book.font_list.append(f) + k += 1 + f = Font() + f.font_index = k + book.font_list.append(f) + if bv >= 50: + ( + f.height, option_flags, f.colour_index, f.weight, + f.escapement_type, f.underline_type, f.family, + f.character_set, + ) = unpack('> 1 + f.underlined = (option_flags & 4) >> 2 + f.struck_out = (option_flags & 8) >> 3 + f.outline = (option_flags & 16) >> 4 + f.shadow = (option_flags & 32) >> 5 + if bv >= 80: + f.name = unpack_unicode(data, 14, lenlen=1) + else: + f.name = unpack_string(data, 14, book.encoding, lenlen=1) + elif bv >= 30: + f.height, option_flags, f.colour_index = unpack('> 1 + f.underlined = (option_flags & 4) >> 2 + f.struck_out = (option_flags & 8) >> 3 + f.outline = (option_flags & 16) >> 4 + f.shadow = (option_flags & 32) >> 5 + f.name = unpack_string(data, 6, book.encoding, lenlen=1) + # Now cook up the remaining attributes ... + f.weight = [400, 700][f.bold] + f.escapement_type = 0 # None + f.underline_type = f.underlined # None or Single + f.family = 0 # Unknown / don't care + f.character_set = 1 # System default (0 means "ANSI Latin") + else: # BIFF2 + f.height, option_flags = unpack('> 1 + f.underlined = (option_flags & 4) >> 2 + f.struck_out = (option_flags & 8) >> 3 + f.outline = 0 + f.shadow = 0 + f.name = unpack_string(data, 4, book.encoding, lenlen=1) + # Now cook up the remaining attributes ... + f.weight = [400, 700][f.bold] + f.escapement_type = 0 # None + f.underline_type = f.underlined # None or Single + f.family = 0 # Unknown / don't care + f.character_set = 1 # System default (0 means "ANSI Latin") + if verbose: + f.dump( + book.logfile, + header="--- handle_font: font[%d] ---" % f.font_index, + footer="-------------------", + ) + +# === "Number formats" === + +# "Number format" information from a FORMAT record. +# - New in version 0.6.1 +class Format(BaseObject, EqNeAttrs): + + # The key into Book.format_map + format_key = 0 + + # A classification that has been inferred from the format string. + # Currently, this is used only to distinguish between numbers and dates. + # Values: + # FUN = 0 # unknown + # FDT = 1 # date + # FNU = 2 # number + # FGE = 3 # general + # FTX = 4 # text + type = FUN + + # The format string + format_str = '' + + def __init__(self, format_key, ty, format_str): + self.format_key = format_key + self.type = ty + self.format_str = format_str + +std_format_strings = { + # "std" == "standard for US English locale" + # #### TODO ... a lot of work to tailor these to the user's locale. + # See e.g. gnumeric-1.x.y/src/formats.c + 0x00: "General", + 0x01: "0", + 0x02: "0.00", + 0x03: "#,##0", + 0x04: "#,##0.00", + 0x05: "$#,##0_);($#,##0)", + 0x06: "$#,##0_);[Red]($#,##0)", + 0x07: "$#,##0.00_);($#,##0.00)", + 0x08: "$#,##0.00_);[Red]($#,##0.00)", + 0x09: "0%", + 0x0a: "0.00%", + 0x0b: "0.00E+00", + 0x0c: "# ?/?", + 0x0d: "# ??/??", + 0x0e: "m/d/yy", + 0x0f: "d-mmm-yy", + 0x10: "d-mmm", + 0x11: "mmm-yy", + 0x12: "h:mm AM/PM", + 0x13: "h:mm:ss AM/PM", + 0x14: "h:mm", + 0x15: "h:mm:ss", + 0x16: "m/d/yy h:mm", + 0x25: "#,##0_);(#,##0)", + 0x26: "#,##0_);[Red](#,##0)", + 0x27: "#,##0.00_);(#,##0.00)", + 0x28: "#,##0.00_);[Red](#,##0.00)", + 0x29: "_(* #,##0_);_(* (#,##0);_(* \"-\"_);_(@_)", + 0x2a: "_($* #,##0_);_($* (#,##0);_($* \"-\"_);_(@_)", + 0x2b: "_(* #,##0.00_);_(* (#,##0.00);_(* \"-\"??_);_(@_)", + 0x2c: "_($* #,##0.00_);_($* (#,##0.00);_($* \"-\"??_);_(@_)", + 0x2d: "mm:ss", + 0x2e: "[h]:mm:ss", + 0x2f: "mm:ss.0", + 0x30: "##0.0E+0", + 0x31: "@", + } + +fmt_code_ranges = [ # both-inclusive ranges of "standard" format codes + # Source: the openoffice.org doc't + ( 0, 0, FGE), + ( 1, 13, FNU), + (14, 22, FDT), + #### (27, 36, FDT), # Japanese dates -- not sure of reliability of this + (37, 44, FNU), + (45, 47, FDT), + (48, 48, FNU), + (49, 49, FTX), + ####(50, 58, FDT), # Japanese dates -- but Gnumeric assumes + # built-in formats finish at 49, not at 163 + ] + +std_format_code_types = {} +for lo, hi, ty in fmt_code_ranges: + for x in range(lo, hi+1): + std_format_code_types[x] = ty +del lo, hi, ty, x + +date_chars = 'ymdhs' # year, month/minute, day, hour, second +date_char_dict = {} +for _c in date_chars + date_chars.upper(): + date_char_dict[_c] = 5 +del _c, date_chars + +#(to_py3): +skip_char_dict = frozenset('$-+/(): ') + +num_char_dict = { + '0': 5, + '#': 5, + '?': 5, + } + +non_date_formats = { + '0.00E+00':1, + '##0.0E+0':1, + 'General' :1, + 'GENERAL' :1, # OOo Calc 1.1.4 does this. + 'general' :1, # pyExcelerator 0.6.3 does this. + '@' :1, + } + +fmt_bracketed_sub = re.compile(r'\[[^]]*\]').sub + +# Boolean format strings (actual cases) +# u'"Yes";"Yes";"No"' +# u'"True";"True";"False"' +# u'"On";"On";"Off"' + +def is_date_format_string(book, fmt): + # Heuristics: + # Ignore "text" and [stuff in square brackets (aarrgghh -- see below)]. + # Handle backslashed-escaped chars properly. + # E.g. hh\hmm\mss\s should produce a display like 23h59m59s + # Date formats have one or more of ymdhs (caseless) in them. + # Numeric formats have # and 0. + # N.B. u'General"."' hence get rid of "text" first. + # TODO: Find where formats are interpreted in Gnumeric + # TODO: u'[h]\\ \\h\\o\\u\\r\\s' ([h] means don't care about hours > 23) + state = 0 + s = '' + # (to_py3): ignorable = skip_char_dict.has_key + for c in fmt: + if state == 0: + if c == '"': + state = 1 + elif c in r"\_*": + state = 2 + elif c in skip_char_dict: # (to_py3): + pass + else: + s += c + elif state == 1: + if c == '"': + state = 0 + elif state == 2: + # Ignore char after backslash, underscore or asterisk + state = 0 + assert 0 <= state <= 2 + if book.verbosity >= 4: + print("is_date_format_string: reduced format is %r" % s) + s = fmt_bracketed_sub('', s) + if s in non_date_formats: + return False + state = 0 + separator = ";" + got_sep = 0 + date_count = num_count = 0 + for c in s: + if c in date_char_dict: + date_count += date_char_dict[c] + elif c in num_char_dict: + num_count += num_char_dict[c] + elif c == separator: + got_sep = 1 + # print num_count, date_count, repr(fmt) + if date_count and not num_count: + return True + if num_count and not date_count: + return False + if date_count: + fprintf(book.logfile, + 'WARNING *** is_date_format: ambiguous d=%d n=%d fmt=%r\n', + date_count, num_count, fmt) + elif not got_sep: + fprintf(book.logfile, + "WARNING *** format %r produces constant result\n", + fmt) + return date_count > num_count + +def handle_format(self, data, rectype=XL_FORMAT): + DEBUG = 0 + bv = self.biff_version + if rectype == XL_FORMAT2: + bv = min(bv, 30) + if not self.encoding: + self.derive_encoding() + strpos = 2 + if bv >= 50: + fmtkey = unpack('= 80: + unistrg = unpack_unicode(data, 2) + else: + unistrg = unpack_string(data, strpos, self.encoding, lenlen=1) + verbose = DEBUG or self.verbosity >= 3 + if verbose: + fprintf(self.logfile, + "FORMAT: count=%d fmtkey=0x%04x (%d) s=%r\n", + self.actualfmtcount, fmtkey, fmtkey, unistrg) + is_date_s = self.is_date_format_string(unistrg) + ty = [FGE, FDT][is_date_s] + if not(fmtkey > 163 or bv < 50): + # user_defined if fmtkey > 163 + # N.B. Gnumeric incorrectly starts these at 50 instead of 164 :-( + # if earlier than BIFF 5, standard info is useless + std_ty = std_format_code_types.get(fmtkey, FUN) + # print "std ty", std_ty + is_date_c = std_ty == FDT + if 0 < fmtkey < 50 and (is_date_c ^ is_date_s): + DEBUG = 2 + fprintf(self.logfile, + "WARNING *** Conflict between " + "std format key %d and its format string %r\n", + fmtkey, unistrg) + if DEBUG == 2: + fprintf(self.logfile, + "ty: %d; is_date_c: %r; is_date_s: %r; fmt_strg: %r", + ty, is_date_c, is_date_s, unistrg) + fmtobj = Format(fmtkey, ty, unistrg) + if verbose: + fmtobj.dump(self.logfile, + header="--- handle_format [%d] ---" % (self.actualfmtcount-1, )) + self.format_map[fmtkey] = fmtobj + self.format_list.append(fmtobj) + +# ============================================================================= + +def handle_palette(book, data): + if not book.formatting_info: + return + verbose = DEBUG or book.verbosity >= 2 + n_colours, = unpack('= 50] + if ((DEBUG or book.verbosity >= 1) + and n_colours != expected_n_colours): + fprintf(book.logfile, + "NOTE *** Expected %d colours in PALETTE record, found %d\n", + expected_n_colours, n_colours) + elif verbose: + fprintf(book.logfile, + "PALETTE record with %d colours\n", n_colours) + fmt = '> 8) & 0xff + blue = (c >> 16) & 0xff + old_rgb = book.colour_map[8+i] + new_rgb = (red, green, blue) + book.palette_record.append(new_rgb) + book.colour_map[8+i] = new_rgb + if verbose: + if new_rgb != old_rgb: + print("%2d: %r -> %r" % (i, old_rgb, new_rgb), file=book.logfile) + +def palette_epilogue(book): + # Check colour indexes in fonts etc. + # This must be done here as FONT records + # come *before* the PALETTE record :-( + for font in book.font_list: + if font.font_index == 4: # the missing font record + continue + cx = font.colour_index + if cx == 0x7fff: # system window text colour + continue + if cx in book.colour_map: + book.colour_indexes_used[cx] = 1 + else: + print("Size of colour table:", len(book.colour_map)) + print("*** Font #%d (%r): colour index 0x%04x is unknown" \ + % (font.font_index, font.name, cx), file=book.logfile) + if book.verbosity >= 1: + used = list(book.colour_indexes_used.keys()) + used.sort() + print("\nColour indexes used:\n%r\n" % used, file=book.logfile) + +def handle_style(book, data): + verbose = DEBUG or book.verbosity >= 2 + bv = book.biff_version + flag_and_xfx, built_in_id, level = unpack('= 80: + name = unpack_unicode(data, 2, lenlen=2) + else: + name = unpack_string(data, 2, book.encoding, lenlen=1) + if verbose and not name: + print("WARNING *** A user-defined style has a zero-length name", file=book.logfile) + built_in = 0 + built_in_id = 0 + level = 0 + book.style_name_map[name] = (built_in, xf_index) + if verbose: + print("STYLE: built_in=%d xf_index=%d built_in_id=%d level=%d name=%r" \ + % (built_in, xf_index, built_in_id, level, name), file=book.logfile) + +def check_colour_indexes_in_obj(book, obj, orig_index): + alist = list(obj.__dict__.items()) + alist.sort() + for attr, nobj in alist: + if hasattr(nobj, 'dump'): + check_colour_indexes_in_obj(book, nobj, orig_index) + elif attr.find('colour_index') >= 0: + if nobj in book.colour_map: + book.colour_indexes_used[nobj] = 1 + continue + oname = obj.__class__.__name__ + print("*** xf #%d : %s.%s = 0x%04x (unknown)" \ + % (orig_index, oname, attr, nobj), file=book.logfile) + +def handle_xf(self, data): + ### self is a Book instance + # DEBUG = 0 + verbose = DEBUG or self.verbosity >= 3 + bv = self.biff_version + xf = XF() + xf.alignment = XFAlignment() + xf.alignment.indent_level = 0 + xf.alignment.shrink_to_fit = 0 + xf.alignment.text_direction = 0 + xf.border = XFBorder() + xf.border.diag_up = 0 + xf.border.diag_down = 0 + xf.border.diag_colour_index = 0 + xf.border.diag_line_style = 0 # no line + xf.background = XFBackground() + xf.protection = XFProtection() + # fill in the known standard formats + if bv >= 50 and not self.xfcount: + # i.e. do this once before we process the first XF record + for x in list(std_format_code_types.keys()): + if x not in self.format_map: + ty = std_format_code_types[x] + fmt_str = std_format_strings[x] + fmtobj = Format(x, ty, fmt_str) + self.format_map[x] = fmtobj + if bv >= 80: + unpack_fmt = '> 2 + for attr_stem in \ + "format font alignment border background protection".split(): + attr = "_" + attr_stem + "_flag" + setattr(xf, attr, reg & 1) + reg >>= 1 + upkbitsL(xf.border, pkd_brdbkg1, ( + (0, 0x0000000f, 'left_line_style'), + (4, 0x000000f0, 'right_line_style'), + (8, 0x00000f00, 'top_line_style'), + (12, 0x0000f000, 'bottom_line_style'), + (16, 0x007f0000, 'left_colour_index'), + (23, 0x3f800000, 'right_colour_index'), + (30, 0x40000000, 'diag_down'), + (31, 0x80000000, 'diag_up'), + )) + upkbits(xf.border, pkd_brdbkg2, ( + (0, 0x0000007F, 'top_colour_index'), + (7, 0x00003F80, 'bottom_colour_index'), + (14, 0x001FC000, 'diag_colour_index'), + (21, 0x01E00000, 'diag_line_style'), + )) + upkbitsL(xf.background, pkd_brdbkg2, ( + (26, 0xFC000000, 'fill_pattern'), + )) + upkbits(xf.background, pkd_brdbkg3, ( + (0, 0x007F, 'pattern_colour_index'), + (7, 0x3F80, 'background_colour_index'), + )) + elif bv >= 50: + unpack_fmt = '> 2 + for attr_stem in \ + "format font alignment border background protection".split(): + attr = "_" + attr_stem + "_flag" + setattr(xf, attr, reg & 1) + reg >>= 1 + upkbitsL(xf.background, pkd_brdbkg1, ( + ( 0, 0x0000007F, 'pattern_colour_index'), + ( 7, 0x00003F80, 'background_colour_index'), + (16, 0x003F0000, 'fill_pattern'), + )) + upkbitsL(xf.border, pkd_brdbkg1, ( + (22, 0x01C00000, 'bottom_line_style'), + (25, 0xFE000000, 'bottom_colour_index'), + )) + upkbits(xf.border, pkd_brdbkg2, ( + ( 0, 0x00000007, 'top_line_style'), + ( 3, 0x00000038, 'left_line_style'), + ( 6, 0x000001C0, 'right_line_style'), + ( 9, 0x0000FE00, 'top_colour_index'), + (16, 0x007F0000, 'left_colour_index'), + (23, 0x3F800000, 'right_colour_index'), + )) + elif bv >= 40: + unpack_fmt = '> 6 + xf.alignment.rotation = [0, 255, 90, 180][orientation] + reg = pkd_used >> 2 + for attr_stem in \ + "format font alignment border background protection".split(): + attr = "_" + attr_stem + "_flag" + setattr(xf, attr, reg & 1) + reg >>= 1 + upkbits(xf.background, pkd_bkg_34, ( + ( 0, 0x003F, 'fill_pattern'), + ( 6, 0x07C0, 'pattern_colour_index'), + (11, 0xF800, 'background_colour_index'), + )) + upkbitsL(xf.border, pkd_brd_34, ( + ( 0, 0x00000007, 'top_line_style'), + ( 3, 0x000000F8, 'top_colour_index'), + ( 8, 0x00000700, 'left_line_style'), + (11, 0x0000F800, 'left_colour_index'), + (16, 0x00070000, 'bottom_line_style'), + (19, 0x00F80000, 'bottom_colour_index'), + (24, 0x07000000, 'right_line_style'), + (27, 0xF8000000, 'right_colour_index'), + )) + elif bv == 30: + unpack_fmt = '> 2 + for attr_stem in \ + "format font alignment border background protection".split(): + attr = "_" + attr_stem + "_flag" + setattr(xf, attr, reg & 1) + reg >>= 1 + upkbits(xf.background, pkd_bkg_34, ( + ( 0, 0x003F, 'fill_pattern'), + ( 6, 0x07C0, 'pattern_colour_index'), + (11, 0xF800, 'background_colour_index'), + )) + upkbitsL(xf.border, pkd_brd_34, ( + ( 0, 0x00000007, 'top_line_style'), + ( 3, 0x000000F8, 'top_colour_index'), + ( 8, 0x00000700, 'left_line_style'), + (11, 0x0000F800, 'left_colour_index'), + (16, 0x00070000, 'bottom_line_style'), + (19, 0x00F80000, 'bottom_colour_index'), + (24, 0x07000000, 'right_line_style'), + (27, 0xF8000000, 'right_colour_index'), + )) + xf.alignment.vert_align = 2 # bottom + xf.alignment.rotation = 0 + elif bv == 21: + #### Warning: incomplete treatment; formatting_info not fully supported. + #### Probably need to offset incoming BIFF2 XF[n] to BIFF8-like XF[n+16], + #### and create XF[0:16] like the standard ones in BIFF8 + #### *AND* add 16 to all XF references in cell records :-( + (xf.font_index, format_etc, halign_etc) = unpack('= 3 + verbose1 = DEBUG or self.verbosity >= 1 + if verbose: + fprintf(self.logfile, "xf_epilogue called ...\n") + + def check_same(book_arg, xf_arg, parent_arg, attr): + # the _arg caper is to avoid a Warning msg from Python 2.1 :-( + if getattr(xf_arg, attr) != getattr(parent_arg, attr): + fprintf(book_arg.logfile, + "NOTE !!! XF[%d] parent[%d] %s different\n", + xf_arg.xf_index, parent_arg.xf_index, attr) + + for xfx in range(num_xfs): + xf = self.xf_list[xfx] + if xf.format_key not in self.format_map: + msg = "ERROR *** XF[%d] unknown format key (%d, 0x%04x)\n" + fprintf(self.logfile, msg, + xf.xf_index, xf.format_key, xf.format_key) + xf.format_key = 0 + cellty_from_fmtty = { + FNU: XL_CELL_NUMBER, + FUN: XL_CELL_NUMBER, + FGE: XL_CELL_NUMBER, + FDT: XL_CELL_DATE, + FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text. + } + fmt = self.format_map[xf.format_key] + cellty = cellty_from_fmtty[fmt.type] + self._xf_index_to_xl_type_map[xf.xf_index] = cellty + # Now for some assertions etc + if not self.formatting_info: + continue + if xf.is_style: + continue + if not(0 <= xf.parent_style_index < num_xfs): + fprintf(self.logfile, + "WARNING *** XF[%d]: is_style=%d but parent_style_index=%d\n", + xf.xf_index, xf.is_style, xf.parent_style_index) + # make it conform + xf.parent_style_index = 0 + if self.biff_version >= 30: + assert xf.parent_style_index != xf.xf_index + assert self.xf_list[xf.parent_style_index].is_style + if verbose1 and xf.parent_style_index > xf.xf_index: + fprintf(self.logfile, + "NOTE !!! XF[%d]: parent_style_index is %d; out of order?\n", + xf.xf_index, xf.parent_style_index) + parent = self.xf_list[xf.parent_style_index] + if not xf._alignment_flag and not parent._alignment_flag: + if verbose1: check_same(self, xf, parent, 'alignment') + if not xf._background_flag and not parent._background_flag: + if verbose1: check_same(self, xf, parent, 'background') + if not xf._border_flag and not parent._border_flag: + if verbose1: check_same(self, xf, parent, 'border') + if not xf._protection_flag and not parent._protection_flag: + if verbose1: check_same(self, xf, parent, 'protection') + if not xf._format_flag and not parent._format_flag: + if verbose1 and xf.format_key != parent.format_key: + fprintf(self.logfile, + "NOTE !!! XF[%d] fmtk=%d, parent[%d] fmtk=%r\n%r / %r\n", + xf.xf_index, xf.format_key, parent.xf_index, parent.format_key, + self.format_map[xf.format_key].format_str, + self.format_map[parent.format_key].format_str) + if not xf._font_flag and not parent._font_flag: + if verbose1 and xf.font_index != parent.font_index: + fprintf(self.logfile, + "NOTE !!! XF[%d] fontx=%d, parent[%d] fontx=%r\n", + xf.xf_index, xf.font_index, parent.xf_index, parent.font_index) + +def initialise_book(book): + initialise_colour_map(book) + book._xf_epilogue_done = 0 + methods = ( + handle_font, + handle_efont, + handle_format, + is_date_format_string, + handle_palette, + palette_epilogue, + handle_style, + handle_xf, + xf_epilogue, + ) + for method in methods: + setattr(book.__class__, method.__name__, method) + +class XFBorder(BaseObject, EqNeAttrs): + """ A collection of the border-related attributes of an XF record. + + Items correspond to those in the Excel UI's Format/Cells/Border tab. + An explanations of "colour index" is given in the Formatting + section at the start of this document. + There are five line style attributes; possible values and the + associated meanings are: + + 0 = No line, + 1 = Thin, + 2 = Medium, + 3 = Dashed, + 4 = Dotted, + 5 = Thick, + 6 = Double, + 7 = Hair, + 8 = Medium dashed, + 9 = Thin dash-dotted, + 10 = Medium dash-dotted, + 11 = Thin dash-dot-dotted, + 12 = Medium dash-dot-dotted, + 13 = Slanted medium dash-dotted. + The line styles 8 to 13 appear in BIFF8 files (Excel 97 and later) only. + For pictures of the line styles, refer to OOo docs s3.10 (p22) + "Line Styles for Cell Borders (BIFF3-BIFF8)".

+ - New in version 0.6.1 + """ + + # The colour index for the cell's top line + top_colour_index = 0 + # The colour index for the cell's bottom line + bottom_colour_index = 0 + # The colour index for the cell's left line + left_colour_index = 0 + # The colour index for the cell's right line + right_colour_index = 0 + # The colour index for the cell's diagonal lines, if any + diag_colour_index = 0 + # The line style for the cell's top line + top_line_style = 0 + # The line style for the cell's bottom line + bottom_line_style = 0 + # The line style for the cell's left line + left_line_style = 0 + # The line style for the cell's right line + right_line_style = 0 + # The line style for the cell's diagonal lines, if any + diag_line_style = 0 + # 1 = draw a diagonal from top left to bottom right + diag_down = 0 + # 1 = draw a diagonal from bottom left to top right + diag_up = 0 + +# New in version 0.6.1 +class XFBackground(BaseObject, EqNeAttrs): + """ A collection of the background-related attributes of an XF record. + + Items correspond to those in the Excel UI's Format/Cells/Patterns tab. + An explanation of "colour index" is given in the Formatting + section at the start of this document. + + """ + # See section 3.11 of the OOo docs. + fill_pattern = 0 + # See section 3.11 of the OOo docs. + background_colour_index = 0 + # See section 3.11 of the OOo docs. + pattern_colour_index = 0 + +# New in version 0.6.1 +class XFAlignment(BaseObject, EqNeAttrs): + """ A collection of the alignment and similar attributes of an XF record. + + Items correspond to those in the Excel UI's Format/Cells/Alignment tab. + """ + # Values: section 5.115 (p 219) of OOo docs + hor_align = 0 + # Values: section 5.115 (p 220) of OOo docs + vert_align = 0 + # Values: section 5.115 (p 220) of OOo docs. + # Note: file versions BIFF7 and earlier use the documented + # "orientation" attribute; this will be mapped (without loss) + # into "rotation". + rotation = 0 + # 1 = text is wrapped at right margin + text_wrapped = 0 + # A number in range(15). + indent_level = 0 + # 1 = shrink font size to fit text into cell. + shrink_to_fit = 0 + # 0 = according to context; 1 = left-to-right; 2 = right-to-left + text_direction = 0 + +# New in version 0.6.1 +class XFProtection(BaseObject, EqNeAttrs): + """ A collection of the protection-related attributes of an XF record. + + Items correspond to those in the Excel UI's Format/Cells/Protection tab. + Note the OOo docs include the "cell or style" bit + in this bundle of attributes. + This is incorrect; the bit is used in determining which bundles to use. + + """ + # 1 = Cell is prevented from being changed, moved, resized, or deleted + # (only if the sheet is protected). + cell_locked = 0 + # 1 = Hide formula so that it doesn't appear in the formula bar when + # the cell is selected (only if the sheet is protected). + formula_hidden = 0 + +# New in version 0.6.1 +class XF(BaseObject): + """ eXtended Formatting information for cells, rows, columns and styles. + + Each of the 6 flags below describes the validity of + a specific group of attributes. + + In cell XFs, flag==0 means the attributes of the parent style XF are used, + (but only if the attributes are valid there); flag==1 means the attributes + of this XF are used. + In style XFs, flag==0 means the attribute setting is valid; flag==1 means + the attribute should be ignored. + Note that the API + provides both "raw" XFs and "computed" XFs -- in the latter case, cell XFs + have had the above inheritance mechanism applied. + """ + # 0 = cell XF, 1 = style XF + is_style = 0 + # cell XF: Index into Book.xf_list + # of this XF's style XF + # style XF: 0xFFF + parent_style_index = 0 + _format_flag = 0 + _font_flag = 0 + _alignment_flag = 0 + _border_flag = 0 + _background_flag = 0 + _protection_flag = 0 + # Index into Book.xf_list + xf_index = 0 + # Index into Book.font_list + font_index = 0 + # Key into Book.format_map + # + # Warning: OOo docs on the XF record call this "Index to FORMAT record". + # It is not an index in the Python sense. It is a key to a map. + # It is true **only** for Excel 4.0 and earlier files + # that the key into format_map from an XF instance + # is the same as the index into format_list, and **only** + # if the index is less than 164. + # + format_key = 0 + # An instance of an XFProtection object. + protection = None + # An instance of an XFBackground object. + background = None + # An instance of an XFAlignment object. + alignment = None + # An instance of an XFBorder object. + border = None diff --git a/tablib/packages/xlrd3/formula.py b/tablib/packages/xlrd3/formula.py new file mode 100644 index 0000000..445d761 --- /dev/null +++ b/tablib/packages/xlrd3/formula.py @@ -0,0 +1,2083 @@ +# Module for parsing/evaluating Microsoft Excel formulas. +# +# Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd +# This module is part of the xlrd3 package, which is released under +# a BSD-style licence. + +# No part of the content of this file was derived from the works of David Giffin. + +import copy +from struct import unpack + +from .biffh import unpack_unicode_update_pos, unpack_string_update_pos, \ + XLRDError, hex_char_dump, error_text_from_code, BaseObject + +__all__ = [ + 'oBOOL', 'oERR', 'oNUM', 'oREF', 'oREL', 'oSTRG', 'oUNK', + 'decompile_formula', + 'dump_formula', + 'evaluate_name_formula', + 'okind_dict', + 'rangename3d', 'rangename3drel', 'cellname', 'cellnameabs', 'colname', + ] + +# sztabN[opcode] -> the number of bytes to consume. +# -1 means variable +# -2 means this opcode not implemented in this version. +# Which N to use? Depends on biff_version; see szdict. +sztab0 = [-2, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 8, 4, 2, 2, 3, 9, 8, 2, 3, 8, 4, 7, 5, 5, 5, 2, 4, 7, 4, 7, 2, 2, -2, -2, -2, -2, -2, -2, -2, -2, 3, -2, -2, -2, -2, -2, -2, -2] +sztab1 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, 2, 2, 3, 9, 9, 2, 3, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, 3, -2, -2, -2, -2, -2, -2, -2] +sztab2 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, 2, 2, 3, 9, 9, 3, 4, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2] +sztab3 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, -2, -2, 2, 2, 3, 9, 9, 3, 4, 15, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, 25, 18, 21, 18, 21, -2, -2] +sztab4 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -2, -2, 2, 2, 3, 9, 9, 3, 4, 5, 5, 9, 7, 7, 7, 3, 5, 9, 5, 9, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, 7, 7, 11, 7, 11, -2, -2] + +szdict = { + 20 : sztab0, + 21 : sztab0, # Suppose 21 is same as 20.... + 30 : sztab1, + 40 : sztab2, + 45 : sztab2, + 50 : sztab3, + 70 : sztab3, + 80 : sztab4, + } + +# For debugging purposes ... the name for each opcode +# (without the prefix "t" used on OOo docs) +onames = ['Unk00', 'Exp', 'Tbl', 'Add', 'Sub', 'Mul', 'Div', 'Power', 'Concat', 'LT', 'LE', 'EQ', 'GE', 'GT', 'NE', 'Isect', 'List', 'Range', 'Uplus', 'Uminus', 'Percent', 'Paren', 'MissArg', 'Str', 'Extended', 'Attr', 'Sheet', 'EndSheet', 'Err', 'Bool', 'Int', 'Num', 'Array', 'Func', 'FuncVar', 'Name', 'Ref', 'Area', 'MemArea', 'MemErr', 'MemNoMem', 'MemFunc', 'RefErr', 'AreaErr', 'RefN', 'AreaN', 'MemAreaN', 'MemNoMemN', '', '', '', '', '', '', '', '', 'FuncCE', 'NameX', 'Ref3d', 'Area3d', 'RefErr3d', 'AreaErr3d', '', ''] + +func_defs = { + # index: (name, min#args, max#args, flags, #known_args, return_type, kargs) + 0 : ('COUNT', 0, 30, 0x04, 1, 'V', 'R'), + 1 : ('IF', 2, 3, 0x04, 3, 'V', 'VRR'), + 2 : ('ISNA', 1, 1, 0x02, 1, 'V', 'V'), + 3 : ('ISERROR', 1, 1, 0x02, 1, 'V', 'V'), + 4 : ('SUM', 0, 30, 0x04, 1, 'V', 'R'), + 5 : ('AVERAGE', 1, 30, 0x04, 1, 'V', 'R'), + 6 : ('MIN', 1, 30, 0x04, 1, 'V', 'R'), + 7 : ('MAX', 1, 30, 0x04, 1, 'V', 'R'), + 8 : ('ROW', 0, 1, 0x04, 1, 'V', 'R'), + 9 : ('COLUMN', 0, 1, 0x04, 1, 'V', 'R'), + 10 : ('NA', 0, 0, 0x02, 0, 'V', ''), + 11 : ('NPV', 2, 30, 0x04, 2, 'V', 'VR'), + 12 : ('STDEV', 1, 30, 0x04, 1, 'V', 'R'), + 13 : ('DOLLAR', 1, 2, 0x04, 1, 'V', 'V'), + 14 : ('FIXED', 2, 3, 0x04, 3, 'V', 'VVV'), + 15 : ('SIN', 1, 1, 0x02, 1, 'V', 'V'), + 16 : ('COS', 1, 1, 0x02, 1, 'V', 'V'), + 17 : ('TAN', 1, 1, 0x02, 1, 'V', 'V'), + 18 : ('ATAN', 1, 1, 0x02, 1, 'V', 'V'), + 19 : ('PI', 0, 0, 0x02, 0, 'V', ''), + 20 : ('SQRT', 1, 1, 0x02, 1, 'V', 'V'), + 21 : ('EXP', 1, 1, 0x02, 1, 'V', 'V'), + 22 : ('LN', 1, 1, 0x02, 1, 'V', 'V'), + 23 : ('LOG10', 1, 1, 0x02, 1, 'V', 'V'), + 24 : ('ABS', 1, 1, 0x02, 1, 'V', 'V'), + 25 : ('INT', 1, 1, 0x02, 1, 'V', 'V'), + 26 : ('SIGN', 1, 1, 0x02, 1, 'V', 'V'), + 27 : ('ROUND', 2, 2, 0x02, 2, 'V', 'VV'), + 28 : ('LOOKUP', 2, 3, 0x04, 2, 'V', 'VR'), + 29 : ('INDEX', 2, 4, 0x0c, 4, 'R', 'RVVV'), + 30 : ('REPT', 2, 2, 0x02, 2, 'V', 'VV'), + 31 : ('MID', 3, 3, 0x02, 3, 'V', 'VVV'), + 32 : ('LEN', 1, 1, 0x02, 1, 'V', 'V'), + 33 : ('VALUE', 1, 1, 0x02, 1, 'V', 'V'), + 34 : ('TRUE', 0, 0, 0x02, 0, 'V', ''), + 35 : ('FALSE', 0, 0, 0x02, 0, 'V', ''), + 36 : ('AND', 1, 30, 0x04, 1, 'V', 'R'), + 37 : ('OR', 1, 30, 0x04, 1, 'V', 'R'), + 38 : ('NOT', 1, 1, 0x02, 1, 'V', 'V'), + 39 : ('MOD', 2, 2, 0x02, 2, 'V', 'VV'), + 40 : ('DCOUNT', 3, 3, 0x02, 3, 'V', 'RRR'), + 41 : ('DSUM', 3, 3, 0x02, 3, 'V', 'RRR'), + 42 : ('DAVERAGE', 3, 3, 0x02, 3, 'V', 'RRR'), + 43 : ('DMIN', 3, 3, 0x02, 3, 'V', 'RRR'), + 44 : ('DMAX', 3, 3, 0x02, 3, 'V', 'RRR'), + 45 : ('DSTDEV', 3, 3, 0x02, 3, 'V', 'RRR'), + 46 : ('VAR', 1, 30, 0x04, 1, 'V', 'R'), + 47 : ('DVAR', 3, 3, 0x02, 3, 'V', 'RRR'), + 48 : ('TEXT', 2, 2, 0x02, 2, 'V', 'VV'), + 49 : ('LINEST', 1, 4, 0x04, 4, 'A', 'RRVV'), + 50 : ('TREND', 1, 4, 0x04, 4, 'A', 'RRRV'), + 51 : ('LOGEST', 1, 4, 0x04, 4, 'A', 'RRVV'), + 52 : ('GROWTH', 1, 4, 0x04, 4, 'A', 'RRRV'), + 56 : ('PV', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 57 : ('FV', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 58 : ('NPER', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 59 : ('PMT', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 60 : ('RATE', 3, 6, 0x04, 6, 'V', 'VVVVVV'), + 61 : ('MIRR', 3, 3, 0x02, 3, 'V', 'RVV'), + 62 : ('IRR', 1, 2, 0x04, 2, 'V', 'RV'), + 63 : ('RAND', 0, 0, 0x0a, 0, 'V', ''), + 64 : ('MATCH', 2, 3, 0x04, 3, 'V', 'VRR'), + 65 : ('DATE', 3, 3, 0x02, 3, 'V', 'VVV'), + 66 : ('TIME', 3, 3, 0x02, 3, 'V', 'VVV'), + 67 : ('DAY', 1, 1, 0x02, 1, 'V', 'V'), + 68 : ('MONTH', 1, 1, 0x02, 1, 'V', 'V'), + 69 : ('YEAR', 1, 1, 0x02, 1, 'V', 'V'), + 70 : ('WEEKDAY', 1, 2, 0x04, 2, 'V', 'VV'), + 71 : ('HOUR', 1, 1, 0x02, 1, 'V', 'V'), + 72 : ('MINUTE', 1, 1, 0x02, 1, 'V', 'V'), + 73 : ('SECOND', 1, 1, 0x02, 1, 'V', 'V'), + 74 : ('NOW', 0, 0, 0x0a, 0, 'V', ''), + 75 : ('AREAS', 1, 1, 0x02, 1, 'V', 'R'), + 76 : ('ROWS', 1, 1, 0x02, 1, 'V', 'R'), + 77 : ('COLUMNS', 1, 1, 0x02, 1, 'V', 'R'), + 78 : ('OFFSET', 3, 5, 0x04, 5, 'R', 'RVVVV'), + 82 : ('SEARCH', 2, 3, 0x04, 3, 'V', 'VVV'), + 83 : ('TRANSPOSE', 1, 1, 0x02, 1, 'A', 'A'), + 86 : ('TYPE', 1, 1, 0x02, 1, 'V', 'V'), + 92 : ('SERIESSUM', 4, 4, 0x02, 4, 'V', 'VVVA'), + 97 : ('ATAN2', 2, 2, 0x02, 2, 'V', 'VV'), + 98 : ('ASIN', 1, 1, 0x02, 1, 'V', 'V'), + 99 : ('ACOS', 1, 1, 0x02, 1, 'V', 'V'), + 100: ('CHOOSE', 2, 30, 0x04, 2, 'V', 'VR'), + 101: ('HLOOKUP', 3, 4, 0x04, 4, 'V', 'VRRV'), + 102: ('VLOOKUP', 3, 4, 0x04, 4, 'V', 'VRRV'), + 105: ('ISREF', 1, 1, 0x02, 1, 'V', 'R'), + 109: ('LOG', 1, 2, 0x04, 2, 'V', 'VV'), + 111: ('CHAR', 1, 1, 0x02, 1, 'V', 'V'), + 112: ('LOWER', 1, 1, 0x02, 1, 'V', 'V'), + 113: ('UPPER', 1, 1, 0x02, 1, 'V', 'V'), + 114: ('PROPER', 1, 1, 0x02, 1, 'V', 'V'), + 115: ('LEFT', 1, 2, 0x04, 2, 'V', 'VV'), + 116: ('RIGHT', 1, 2, 0x04, 2, 'V', 'VV'), + 117: ('EXACT', 2, 2, 0x02, 2, 'V', 'VV'), + 118: ('TRIM', 1, 1, 0x02, 1, 'V', 'V'), + 119: ('REPLACE', 4, 4, 0x02, 4, 'V', 'VVVV'), + 120: ('SUBSTITUTE', 3, 4, 0x04, 4, 'V', 'VVVV'), + 121: ('CODE', 1, 1, 0x02, 1, 'V', 'V'), + 124: ('FIND', 2, 3, 0x04, 3, 'V', 'VVV'), + 125: ('CELL', 1, 2, 0x0c, 2, 'V', 'VR'), + 126: ('ISERR', 1, 1, 0x02, 1, 'V', 'V'), + 127: ('ISTEXT', 1, 1, 0x02, 1, 'V', 'V'), + 128: ('ISNUMBER', 1, 1, 0x02, 1, 'V', 'V'), + 129: ('ISBLANK', 1, 1, 0x02, 1, 'V', 'V'), + 130: ('T', 1, 1, 0x02, 1, 'V', 'R'), + 131: ('N', 1, 1, 0x02, 1, 'V', 'R'), + 140: ('DATEVALUE', 1, 1, 0x02, 1, 'V', 'V'), + 141: ('TIMEVALUE', 1, 1, 0x02, 1, 'V', 'V'), + 142: ('SLN', 3, 3, 0x02, 3, 'V', 'VVV'), + 143: ('SYD', 4, 4, 0x02, 4, 'V', 'VVVV'), + 144: ('DDB', 4, 5, 0x04, 5, 'V', 'VVVVV'), + 148: ('INDIRECT', 1, 2, 0x0c, 2, 'R', 'VV'), + 162: ('CLEAN', 1, 1, 0x02, 1, 'V', 'V'), + 163: ('MDETERM', 1, 1, 0x02, 1, 'V', 'A'), + 164: ('MINVERSE', 1, 1, 0x02, 1, 'A', 'A'), + 165: ('MMULT', 2, 2, 0x02, 2, 'A', 'AA'), + 167: ('IPMT', 4, 6, 0x04, 6, 'V', 'VVVVVV'), + 168: ('PPMT', 4, 6, 0x04, 6, 'V', 'VVVVVV'), + 169: ('COUNTA', 0, 30, 0x04, 1, 'V', 'R'), + 183: ('PRODUCT', 0, 30, 0x04, 1, 'V', 'R'), + 184: ('FACT', 1, 1, 0x02, 1, 'V', 'V'), + 189: ('DPRODUCT', 3, 3, 0x02, 3, 'V', 'RRR'), + 190: ('ISNONTEXT', 1, 1, 0x02, 1, 'V', 'V'), + 193: ('STDEVP', 1, 30, 0x04, 1, 'V', 'R'), + 194: ('VARP', 1, 30, 0x04, 1, 'V', 'R'), + 195: ('DSTDEVP', 3, 3, 0x02, 3, 'V', 'RRR'), + 196: ('DVARP', 3, 3, 0x02, 3, 'V', 'RRR'), + 197: ('TRUNC', 1, 2, 0x04, 2, 'V', 'VV'), + 198: ('ISLOGICAL', 1, 1, 0x02, 1, 'V', 'V'), + 199: ('DCOUNTA', 3, 3, 0x02, 3, 'V', 'RRR'), + 204: ('USDOLLAR', 1, 2, 0x04, 2, 'V', 'VV'), + 205: ('FINDB', 2, 3, 0x04, 3, 'V', 'VVV'), + 206: ('SEARCHB', 2, 3, 0x04, 3, 'V', 'VVV'), + 207: ('REPLACEB', 4, 4, 0x02, 4, 'V', 'VVVV'), + 208: ('LEFTB', 1, 2, 0x04, 2, 'V', 'VV'), + 209: ('RIGHTB', 1, 2, 0x04, 2, 'V', 'VV'), + 210: ('MIDB', 3, 3, 0x02, 3, 'V', 'VVV'), + 211: ('LENB', 1, 1, 0x02, 1, 'V', 'V'), + 212: ('ROUNDUP', 2, 2, 0x02, 2, 'V', 'VV'), + 213: ('ROUNDDOWN', 2, 2, 0x02, 2, 'V', 'VV'), + 214: ('ASC', 1, 1, 0x02, 1, 'V', 'V'), + 215: ('DBCS', 1, 1, 0x02, 1, 'V', 'V'), + 216: ('RANK', 2, 3, 0x04, 3, 'V', 'VRV'), + 219: ('ADDRESS', 2, 5, 0x04, 5, 'V', 'VVVVV'), + 220: ('DAYS360', 2, 3, 0x04, 3, 'V', 'VVV'), + 221: ('TODAY', 0, 0, 0x0a, 0, 'V', ''), + 222: ('VDB', 5, 7, 0x04, 7, 'V', 'VVVVVVV'), + 227: ('MEDIAN', 1, 30, 0x04, 1, 'V', 'R'), + 228: ('SUMPRODUCT', 1, 30, 0x04, 1, 'V', 'A'), + 229: ('SINH', 1, 1, 0x02, 1, 'V', 'V'), + 230: ('COSH', 1, 1, 0x02, 1, 'V', 'V'), + 231: ('TANH', 1, 1, 0x02, 1, 'V', 'V'), + 232: ('ASINH', 1, 1, 0x02, 1, 'V', 'V'), + 233: ('ACOSH', 1, 1, 0x02, 1, 'V', 'V'), + 234: ('ATANH', 1, 1, 0x02, 1, 'V', 'V'), + 235: ('DGET', 3, 3, 0x02, 3, 'V', 'RRR'), + 244: ('INFO', 1, 1, 0x02, 1, 'V', 'V'), + 247: ('DB', 4, 5, 0x04, 5, 'V', 'VVVVV'), + 252: ('FREQUENCY', 2, 2, 0x02, 2, 'A', 'RR'), + 261: ('ERROR.TYPE', 1, 1, 0x02, 1, 'V', 'V'), + 269: ('AVEDEV', 1, 30, 0x04, 1, 'V', 'R'), + 270: ('BETADIST', 3, 5, 0x04, 1, 'V', 'V'), + 271: ('GAMMALN', 1, 1, 0x02, 1, 'V', 'V'), + 272: ('BETAINV', 3, 5, 0x04, 1, 'V', 'V'), + 273: ('BINOMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 274: ('CHIDIST', 2, 2, 0x02, 2, 'V', 'VV'), + 275: ('CHIINV', 2, 2, 0x02, 2, 'V', 'VV'), + 276: ('COMBIN', 2, 2, 0x02, 2, 'V', 'VV'), + 277: ('CONFIDENCE', 3, 3, 0x02, 3, 'V', 'VVV'), + 278: ('CRITBINOM', 3, 3, 0x02, 3, 'V', 'VVV'), + 279: ('EVEN', 1, 1, 0x02, 1, 'V', 'V'), + 280: ('EXPONDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 281: ('FDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 282: ('FINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 283: ('FISHER', 1, 1, 0x02, 1, 'V', 'V'), + 284: ('FISHERINV', 1, 1, 0x02, 1, 'V', 'V'), + 285: ('FLOOR', 2, 2, 0x02, 2, 'V', 'VV'), + 286: ('GAMMADIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 287: ('GAMMAINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 288: ('CEILING', 2, 2, 0x02, 2, 'V', 'VV'), + 289: ('HYPGEOMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 290: ('LOGNORMDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 291: ('LOGINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 292: ('NEGBINOMDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 293: ('NORMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 294: ('NORMSDIST', 1, 1, 0x02, 1, 'V', 'V'), + 295: ('NORMINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 296: ('NORMSINV', 1, 1, 0x02, 1, 'V', 'V'), + 297: ('STANDARDIZE', 3, 3, 0x02, 3, 'V', 'VVV'), + 298: ('ODD', 1, 1, 0x02, 1, 'V', 'V'), + 299: ('PERMUT', 2, 2, 0x02, 2, 'V', 'VV'), + 300: ('POISSON', 3, 3, 0x02, 3, 'V', 'VVV'), + 301: ('TDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 302: ('WEIBULL', 4, 4, 0x02, 4, 'V', 'VVVV'), + 303: ('SUMXMY2', 2, 2, 0x02, 2, 'V', 'AA'), + 304: ('SUMX2MY2', 2, 2, 0x02, 2, 'V', 'AA'), + 305: ('SUMX2PY2', 2, 2, 0x02, 2, 'V', 'AA'), + 306: ('CHITEST', 2, 2, 0x02, 2, 'V', 'AA'), + 307: ('CORREL', 2, 2, 0x02, 2, 'V', 'AA'), + 308: ('COVAR', 2, 2, 0x02, 2, 'V', 'AA'), + 309: ('FORECAST', 3, 3, 0x02, 3, 'V', 'VAA'), + 310: ('FTEST', 2, 2, 0x02, 2, 'V', 'AA'), + 311: ('INTERCEPT', 2, 2, 0x02, 2, 'V', 'AA'), + 312: ('PEARSON', 2, 2, 0x02, 2, 'V', 'AA'), + 313: ('RSQ', 2, 2, 0x02, 2, 'V', 'AA'), + 314: ('STEYX', 2, 2, 0x02, 2, 'V', 'AA'), + 315: ('SLOPE', 2, 2, 0x02, 2, 'V', 'AA'), + 316: ('TTEST', 4, 4, 0x02, 4, 'V', 'AAVV'), + 317: ('PROB', 3, 4, 0x04, 3, 'V', 'AAV'), + 318: ('DEVSQ', 1, 30, 0x04, 1, 'V', 'R'), + 319: ('GEOMEAN', 1, 30, 0x04, 1, 'V', 'R'), + 320: ('HARMEAN', 1, 30, 0x04, 1, 'V', 'R'), + 321: ('SUMSQ', 0, 30, 0x04, 1, 'V', 'R'), + 322: ('KURT', 1, 30, 0x04, 1, 'V', 'R'), + 323: ('SKEW', 1, 30, 0x04, 1, 'V', 'R'), + 324: ('ZTEST', 2, 3, 0x04, 2, 'V', 'RV'), + 325: ('LARGE', 2, 2, 0x02, 2, 'V', 'RV'), + 326: ('SMALL', 2, 2, 0x02, 2, 'V', 'RV'), + 327: ('QUARTILE', 2, 2, 0x02, 2, 'V', 'RV'), + 328: ('PERCENTILE', 2, 2, 0x02, 2, 'V', 'RV'), + 329: ('PERCENTRANK', 2, 3, 0x04, 2, 'V', 'RV'), + 330: ('MODE', 1, 30, 0x04, 1, 'V', 'A'), + 331: ('TRIMMEAN', 2, 2, 0x02, 2, 'V', 'RV'), + 332: ('TINV', 2, 2, 0x02, 2, 'V', 'VV'), + 336: ('CONCATENATE', 0, 30, 0x04, 1, 'V', 'V'), + 337: ('POWER', 2, 2, 0x02, 2, 'V', 'VV'), + 342: ('RADIANS', 1, 1, 0x02, 1, 'V', 'V'), + 343: ('DEGREES', 1, 1, 0x02, 1, 'V', 'V'), + 344: ('SUBTOTAL', 2, 30, 0x04, 2, 'V', 'VR'), + 345: ('SUMIF', 2, 3, 0x04, 3, 'V', 'RVR'), + 346: ('COUNTIF', 2, 2, 0x02, 2, 'V', 'RV'), + 347: ('COUNTBLANK', 1, 1, 0x02, 1, 'V', 'R'), + 350: ('ISPMT', 4, 4, 0x02, 4, 'V', 'VVVV'), + 351: ('DATEDIF', 3, 3, 0x02, 3, 'V', 'VVV'), + 352: ('DATESTRING', 1, 1, 0x02, 1, 'V', 'V'), + 353: ('NUMBERSTRING', 2, 2, 0x02, 2, 'V', 'VV'), + 354: ('ROMAN', 1, 2, 0x04, 2, 'V', 'VV'), + 358: ('GETPIVOTDATA', 2, 2, 0x02, 2, 'V', 'RV'), + 359: ('HYPERLINK', 1, 2, 0x04, 2, 'V', 'VV'), + 360: ('PHONETIC', 1, 1, 0x02, 1, 'V', 'V'), + 361: ('AVERAGEA', 1, 30, 0x04, 1, 'V', 'R'), + 362: ('MAXA', 1, 30, 0x04, 1, 'V', 'R'), + 363: ('MINA', 1, 30, 0x04, 1, 'V', 'R'), + 364: ('STDEVPA', 1, 30, 0x04, 1, 'V', 'R'), + 365: ('VARPA', 1, 30, 0x04, 1, 'V', 'R'), + 366: ('STDEVA', 1, 30, 0x04, 1, 'V', 'R'), + 367: ('VARA', 1, 30, 0x04, 1, 'V', 'R'), + 368: ('BAHTTEXT', 1, 1, 0x02, 1, 'V', 'V'), + 369: ('THAIDAYOFWEEK', 1, 1, 0x02, 1, 'V', 'V'), + 370: ('THAIDIGIT', 1, 1, 0x02, 1, 'V', 'V'), + 371: ('THAIMONTHOFYEAR', 1, 1, 0x02, 1, 'V', 'V'), + 372: ('THAINUMSOUND', 1, 1, 0x02, 1, 'V', 'V'), + 373: ('THAINUMSTRING', 1, 1, 0x02, 1, 'V', 'V'), + 374: ('THAISTRINGLENGTH', 1, 1, 0x02, 1, 'V', 'V'), + 375: ('ISTHAIDIGIT', 1, 1, 0x02, 1, 'V', 'V'), + 376: ('ROUNDBAHTDOWN', 1, 1, 0x02, 1, 'V', 'V'), + 377: ('ROUNDBAHTUP', 1, 1, 0x02, 1, 'V', 'V'), + 378: ('THAIYEAR', 1, 1, 0x02, 1, 'V', 'V'), + 379: ('RTD', 2, 5, 0x04, 1, 'V', 'V'), + } + +tAttrNames = { + 0x00: "Skip??", # seen in SAMPLES.XLS which shipped with Excel 5.0 + 0x01: "Volatile", + 0x02: "If", + 0x04: "Choose", + 0x08: "Skip", + 0x10: "Sum", + 0x20: "Assign", + 0x40: "Space", + 0x41: "SpaceVolatile", + } + +_error_opcodes = frozenset([0x07, 0x08, 0x0A, 0x0B, 0x1C, 0x1D, 0x2F]) + +tRangeFuncs = (min, max, min, max, min, max) +tIsectFuncs = (max, min, max, min, max, min) + +def do_box_funcs(box_funcs, boxa, boxb): + return tuple([ + func(numa, numb) + for func, numa, numb in zip(box_funcs, boxa.coords, boxb.coords) + ]) + +def adjust_cell_addr_biff8(rowval, colval, reldelta, browx=None, bcolx=None): + row_rel = (colval >> 15) & 1 + col_rel = (colval >> 14) & 1 + rowx = rowval + colx = colval & 0xff + if reldelta: + if row_rel and rowx >= 32768: + rowx -= 65536 + if col_rel and colx >= 128: + colx -= 256 + else: + if row_rel: + rowx -= browx + if col_rel: + colx -= bcolx + return rowx, colx, row_rel, col_rel + +def adjust_cell_addr_biff_le7( + rowval, colval, reldelta, browx=None, bcolx=None): + row_rel = (rowval >> 15) & 1 + col_rel = (rowval >> 14) & 1 + rowx = rowval & 0x3fff + colx = colval + if reldelta: + if row_rel and rowx >= 8192: + rowx -= 16384 + if col_rel and colx >= 128: + colx -= 256 + else: + if row_rel: + rowx -= browx + if col_rel: + colx -= bcolx + return rowx, colx, row_rel, col_rel + +def get_cell_addr(data, pos, bv, reldelta, browx=None, bcolx=None): + if bv >= 80: + rowval, colval = unpack("= 80: + row1val, row2val, col1val, col2val = unpack(" addins %r" % (refx, info)) + assert ref_first_sheetx == 0xFFFE == ref_last_sheetx + return (-5, -5) + if ref_recordx != bk._supbook_locals_inx: + if verbose: + print("/// get_externsheet_local_range(refx=%d) -> external %r" % (refx, info)) + return (-4, -4) # external reference + if ref_first_sheetx == 0xFFFE == ref_last_sheetx: + if verbose: + print("/// get_externsheet_local_range(refx=%d) -> unspecified sheet %r" % (refx, info)) + return (-1, -1) # internal reference, any sheet + if ref_first_sheetx == 0xFFFF == ref_last_sheetx: + if verbose: + print("/// get_externsheet_local_range(refx=%d) -> deleted sheet(s)" % (refx, )) + return (-2, -2) # internal reference, deleted sheet(s) + nsheets = len(bk._all_sheets_map) + if not(0 <= ref_first_sheetx <= ref_last_sheetx < nsheets): + if verbose: + print("/// get_externsheet_local_range(refx=%d) -> %r" % (refx, info)) + print("--- first/last sheet not in range(%d)" % nsheets) + return (-102, -102) # stuffed up somewhere :-( + xlrd_sheetx1 = bk._all_sheets_map[ref_first_sheetx] + xlrd_sheetx2 = bk._all_sheets_map[ref_last_sheetx] + if not(0 <= xlrd_sheetx1 <= xlrd_sheetx2): + return (-3, -3) # internal reference, but to a macro sheet + return xlrd_sheetx1, xlrd_sheetx2 + +def get_externsheet_local_range_b57( + bk, raw_extshtx, ref_first_sheetx, ref_last_sheetx, verbose=0): + if raw_extshtx > 0: + if verbose: + print("/// get_externsheet_local_range_b57(raw_extshtx=%d) -> external" % raw_extshtx) + return (-4, -4) # external reference + if ref_first_sheetx == -1 and ref_last_sheetx == -1: + return (-2, -2) # internal reference, deleted sheet(s) + nsheets = len(bk._all_sheets_map) + if not(0 <= ref_first_sheetx <= ref_last_sheetx < nsheets): + if verbose: + print("/// get_externsheet_local_range_b57(%d, %d, %d) -> ???" \ + % (raw_extshtx, ref_first_sheetx, ref_last_sheetx)) + print("--- first/last sheet not in range(%d)" % nsheets) + return (-103, -103) # stuffed up somewhere :-( + xlrd_sheetx1 = bk._all_sheets_map[ref_first_sheetx] + xlrd_sheetx2 = bk._all_sheets_map[ref_last_sheetx] + if not(0 <= xlrd_sheetx1 <= xlrd_sheetx2): + return (-3, -3) # internal reference, but to a macro sheet + return xlrd_sheetx1, xlrd_sheetx2 + +class FormulaError(Exception): + pass + +oBOOL = 3 +oERR = 4 +oMSNG = 5 # tMissArg +oNUM = 2 +oREF = -1 +oREL = -2 +oSTRG = 1 +oUNK = 0 + +okind_dict = { + -2: "oREL", + -1: "oREF", + 0 : "oUNK", + 1 : "oSTRG", + 2 : "oNUM", + 3 : "oBOOL", + 4 : "oERR", + 5 : "oMSNG", + } + +listsep = ',' #### probably should depend on locale + +## +# Used in evaluating formulas. +# The following table describes the kinds and how their values +# are represented.

+# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +#
Kind symbolKind numberValue representation
oBOOL3integer: 0 => False; 1 => True
oERR4None, or an int error code (same as XL_CELL_ERROR in the Cell class). +#
oMSNG5Used by Excel as a placeholder for a missing (not supplied) function +# argument. Should *not* appear as a final formula result. Value is None.
oNUM2A float. Note that there is no way of distinguishing dates.
oREF-1The value is either None or a non-empty list of +# absolute Ref3D instances.
+#
oREL-2The value is None or a non-empty list of +# fully or partially relative Ref3D instances. +#
oSTRG1A Unicode string.
oUNK0The kind is unknown or ambiguous. The value is None
+#

+ +class Operand(object): + + ## + # None means that the actual value of the operand is a variable + # (depends on cell data), not a constant. + value = None + ## + # oUNK means that the kind of operand is not known unambiguously. + kind = oUNK + ## + # The reconstituted text of the original formula. Function names will be + # in English irrespective of the original language, which doesn't seem + # to be recorded anywhere. The separator is ",", not ";" or whatever else + # might be more appropriate for the end-user's locale; patches welcome. + text = '?' + + def __init__(self, akind=None, avalue=None, arank=0, atext='?'): + if akind is not None: + self.kind = akind + if avalue is not None: + self.value = avalue + self.rank = arank + # rank is an internal gizmo (operator precedence); + # it's used in reconstructing formula text. + self.text = atext + + def __repr__(self): + kind_text = okind_dict.get(self.kind, "?Unknown kind?") + return "Operand(kind=%s, value=%r, text=%r)" \ + % (kind_text, self.value, self.text) + +#(to_py3) if CAN_SUBCLASS_BUILTIN: +# _ref3d_base = tuple + +# Represents an absolute or relative 3-dimensional reference to a box +# of one or more cells. +# - New in version 0.6.0 +# +# The `coords` attribute is a tuple of the form: +# (shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi) +# where 0 <= thingxlo <= thingx < thingxhi. +# Note that it is quite possible to have thingx > nthings; for example +# Print_Titles could have colxhi == 256 and/or rowxhi == 65536 +# irrespective of how many columns/rows are actually used in the worksheet. +# The caller will need to decide how to handle this situation. +# Keyword: IndexError :-) +# +# The components of the coords attribute are also available as individual +# attributes: shtxlo, shtxhi, rowxlo, rowxhi, colxlo, and colxhi. +# +# The `relflags` attribute is a 6-tuple of flags which indicate whether +# the corresponding (sheet|row|col)(lo|hi) is relative (1) or absolute (0). +# Note that there is necessarily no information available as to what cell(s) +# the reference could possibly be relative to. The caller must decide what if +# any use to make of oREL operands. Note also that a partially relative +# reference may well be a typo. +# For example, define name A1Z10 as $a$1:$z10 (missing $ after z) +# while the cursor is on cell Sheet3!A27.
+# The resulting Ref3D instance will have coords = (2, 3, 0, -16, 0, 26) +# and relflags = (0, 0, 0, 1, 0, 0).
+# So far, only one possibility of a sheet-relative component in +# a reference has been noticed: a 2D reference located in the "current sheet". +# This will appear as coords = (0, 1, ...) and relflags = (1, 1, ...). + +class Ref3D(tuple): + + def __init__(self, atuple): + self.coords = atuple[0:6] + self.relflags = atuple[6:12] + if not self.relflags: + self.relflags = (0, 0, 0, 0, 0, 0) + (self.shtxlo, self.shtxhi, + self.rowxlo, self.rowxhi, + self.colxlo, self.colxhi) = self.coords + + def __repr__(self): + if not self.relflags or self.relflags == (0, 0, 0, 0, 0, 0): + return "Ref3D(coords=%r)" % (self.coords, ) + else: + return "Ref3D(coords=%r, relflags=%r)" \ + % (self.coords, self.relflags) + +tAdd = 0x03 +tSub = 0x04 +tMul = 0x05 +tDiv = 0x06 +tPower = 0x07 +tConcat = 0x08 +tLT, tLE, tEQ, tGE, tGT, tNE = list(range(0x09, 0x0F)) + +import operator as opr + +def nop(x): + return x + +def _opr_pow(x, y): return x ** y + +def _opr_lt(x, y): return x < y +def _opr_le(x, y): return x <= y +def _opr_eq(x, y): return x == y +def _opr_ge(x, y): return x >= y +def _opr_gt(x, y): return x > y +def _opr_ne(x, y): return x != y + +def num2strg(num): + """Attempt to emulate Excel's default conversion + from number to string. + """ + s = str(num) + if s.endswith(".0"): + s = s[:-2] + return s + +_arith_argdict = {oNUM: nop, oSTRG: float} +_cmp_argdict = {oNUM: nop, oSTRG: nop} +# Seems no conversions done on relops; in Excel, "1" > 9 produces TRUE. +_strg_argdict = {oNUM:num2strg, oSTRG:nop} +binop_rules = { + tAdd: (_arith_argdict, oNUM, opr.add, 30, '+'), + tSub: (_arith_argdict, oNUM, opr.sub, 30, '-'), + tMul: (_arith_argdict, oNUM, opr.mul, 40, '*'), + tDiv: (_arith_argdict, oNUM, opr.truediv, 40, '/'), + tPower: (_arith_argdict, oNUM, _opr_pow, 50, '^',), + tConcat:(_strg_argdict, oSTRG, opr.add, 20, '&'), + tLT: (_cmp_argdict, oBOOL, _opr_lt, 10, '<'), + tLE: (_cmp_argdict, oBOOL, _opr_le, 10, '<='), + tEQ: (_cmp_argdict, oBOOL, _opr_eq, 10, '='), + tGE: (_cmp_argdict, oBOOL, _opr_ge, 10, '>='), + tGT: (_cmp_argdict, oBOOL, _opr_gt, 10, '>'), + tNE: (_cmp_argdict, oBOOL, _opr_ne, 10, '<>'), + } + +unop_rules = { + 0x13: (lambda x: -x, 70, '-', ''), # unary minus + 0x12: (lambda x: x, 70, '+', ''), # unary plus + 0x14: (lambda x: x / 100.0, 60, '', '%'),# percent + } + +LEAF_RANK = 90 +FUNC_RANK = 90 + +STACK_ALARM_LEVEL = 5 +STACK_PANIC_LEVEL = 10 + +def evaluate_name_formula(bk, nobj, namex, verbose=0, level=0): + if level > STACK_ALARM_LEVEL: + verbose = 1 + data = nobj.raw_formula + fmlalen = nobj.basic_formula_len + bv = bk.biff_version + reldelta = 1 # All defined name formulas use "Method B" [OOo docs] + if verbose: + print("::: evaluate_name_formula %r %r %d %d %r level=%d" \ + % (namex, nobj.name, fmlalen, bv, data, level)) + hex_char_dump(data, 0, fmlalen) + if level > STACK_PANIC_LEVEL: + raise XLRDError("Excessive indirect references in NAME formula") + sztab = szdict[bv] + pos = 0 + stack = [] + any_rel = 0 + any_err = 0 + any_external = 0 + unk_opnd = Operand(oUNK, None) + error_opnd = Operand(oERR, None) + spush = stack.append + + def do_binop(opcd, stk): + assert len(stk) >= 2 + bop = stk.pop() + aop = stk.pop() + argdict, result_kind, func, rank, sym = binop_rules[opcd] + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + resop = Operand(result_kind, None, rank, otext) + try: + bconv = argdict[bop.kind] + aconv = argdict[aop.kind] + except KeyError: + stk.append(resop) + return + if bop.value is None or aop.value is None: + stk.append(resop) + return + bval = bconv(bop.value) + aval = aconv(aop.value) + result = func(aval, bval) + if result_kind == oBOOL: + result = 1 if result else 0 # (to_py3) + resop.value = result + stk.append(resop) + + def do_unaryop(opcode, arglist, result_kind, stk): + assert len(stk) >= 1 + aop = stk.pop() + assert aop.kind in arglist + val = aop.value + func, rank, sym1, sym2 = unop_rules[opcode] + otext = ''.join([ + sym1, + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym2, + ]) + if val is not None: + val = func(val) + stk.append(Operand(result_kind, val, rank, otext)) + + def not_in_name_formula(op_arg, oname_arg): + msg = "ERROR *** Token 0x%02x (%s) found in NAME formula" \ + % (op_arg, oname_arg) + raise FormulaError(msg) + + if fmlalen == 0: + stack = [unk_opnd] + + while 0 <= pos < fmlalen: + op = data[pos] + opcode = op & 0x1f + optype = (op & 0x60) >> 5 + if optype: + opx = opcode + 32 + else: + opx = opcode + oname = onames[opx] # + [" RVA"][optype] + sz = sztab[opx] + if verbose: + print("Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh" \ + % (pos, op, oname, sz, opcode, optype)) + print("Stack =", stack) + if sz == -2: + msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' \ + % (op, oname, bv) + raise FormulaError(msg) + if not optype: + if 0x00 <= opcode <= 0x02: # unk_opnd, tExp, tTbl + not_in_name_formula(op, oname) + elif 0x03 <= opcode <= 0x0E: + # Add, Sub, Mul, Div, Power + # tConcat + # tLT, ..., tNE + do_binop(opcode, stack) + elif opcode == 0x0F: # tIsect + if verbose: print("tIsect pre", stack, file=bk.logfile) + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ' ' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF) + res.text = otext + if bop.kind == oERR or aop.kind == oERR: + res.kind = oERR + elif bop.kind == oUNK or aop.kind == oUNK: + # This can happen with undefined + # (go search in the current sheet) labels. + # For example =Bob Sales + # Each label gets a NAME record with an empty formula (!) + # Evaluation of the tName token classifies it as oUNK + # res.kind = oREF + pass + elif bop.kind == oREF == aop.kind: + if aop.value is not None and bop.value is not None: + assert len(aop.value) == 1 + assert len(bop.value) == 1 + coords = do_box_funcs( + tIsectFuncs, aop.value[0], bop.value[0]) + res.value = [Ref3D(coords)] + elif bop.kind == oREL == aop.kind: + res.kind = oREL + if aop.value is not None and bop.value is not None: + assert len(aop.value) == 1 + assert len(bop.value) == 1 + coords = do_box_funcs( + tIsectFuncs, aop.value[0], bop.value[0]) + relfa = aop.value[0].relflags + relfb = bop.value[0].relflags + if relfa == relfb: + res.value = [Ref3D(coords + relfa)] + else: + pass + spush(res) + if verbose: print("tIsect post", stack, file=bk.logfile) + elif opcode == 0x10: # tList + if verbose: print("tList pre", stack, file=bk.logfile) + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ',' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF, None, rank, otext) + if bop.kind == oERR or aop.kind == oERR: + res.kind = oERR + elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL): + res.kind = oREF + if aop.kind == oREL or bop.kind == oREL: + res.kind = oREL + if aop.value is not None and bop.value is not None: + assert len(aop.value) >= 1 + assert len(bop.value) == 1 + res.value = aop.value + bop.value + else: + pass + spush(res) + if verbose: print("tList post", stack, file=bk.logfile) + elif opcode == 0x11: # tRange + if verbose: print("tRange pre", stack, file=bk.logfile) + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ':' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF, None, rank, otext) + if bop.kind == oERR or aop.kind == oERR: + res = oERR + elif bop.kind == oREF == aop.kind: + if aop.value is not None and bop.value is not None: + assert len(aop.value) == 1 + assert len(bop.value) == 1 + coords = do_box_funcs( + tRangeFuncs, aop.value[0], bop.value[0]) + res.value = [Ref3D(coords)] + elif bop.kind == oREL == aop.kind: + res.kind = oREL + if aop.value is not None and bop.value is not None: + assert len(aop.value) == 1 + assert len(bop.value) == 1 + coords = do_box_funcs( + tRangeFuncs, aop.value[0], bop.value[0]) + relfa = aop.value[0].relflags + relfb = bop.value[0].relflags + if relfa == relfb: + res.value = [Ref3D(coords + relfa)] + else: + pass + spush(res) + if verbose: print("tRange post", stack, file=bk.logfile) + elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent + do_unaryop(opcode, (oUNK, oNUM,), oNUM, stack) + elif opcode == 0x15: # tParen + # source cosmetics + pass + elif opcode == 0x16: # tMissArg + spush(Operand(oMSNG, None, LEAF_RANK, '')) + elif opcode == 0x17: # tStr + if bv <= 70: + strg, newpos = unpack_string_update_pos( + data, pos+1, bk.encoding, lenlen=1) + else: + strg, newpos = unpack_unicode_update_pos( + data, pos+1, lenlen=1) + sz = newpos - pos + if verbose: print(" sz=%d strg=%r" % (sz, strg), file=bk.logfile) + text = '"' + strg.replace('"', '""') + '"' + spush(Operand(oSTRG, strg, LEAF_RANK, text)) + elif opcode == 0x18: # tExtended + # new with BIFF 8 + assert bv >= 80 + # not in OOo docs + raise FormulaError("tExtended token not implemented") + elif opcode == 0x19: # tAttr + subop, nc = unpack("= 1 + aop = stack[-1] + otext = 'SUM(%s)' % aop.text + stack[-1] = Operand(oNUM, None, FUNC_RANK, otext) + else: + sz = 4 + if verbose: + print(" subop=%02xh subname=t%s sz=%d nc=%02xh" \ + % (subop, subname, sz, nc)) + elif 0x1A <= opcode <= 0x1B: # tSheet, tEndSheet + assert bv < 50 + raise FormulaError("tSheet & tEndsheet tokens not implemented") + elif 0x1C <= opcode <= 0x1F: # tErr, tBool, tInt, tNum + inx = opcode - 0x1C + nb = [1, 1, 2, 8][inx] + kind = [oERR, oBOOL, oNUM, oNUM][inx] + value, = unpack("<" + "BBHd"[inx], data[pos+1:pos+1+nb]) + if inx == 2: # tInt + value = float(value) + text = str(value) + elif inx == 3: # tNum + text = str(value) + elif inx == 1: # tBool + text = ('FALSE', 'TRUE')[value] + else: + text = '"' +error_text_from_code[value] + '"' + spush(Operand(kind, value, LEAF_RANK, text)) + else: + raise FormulaError("Unhandled opcode: 0x%02x" % opcode) + if sz <= 0: + raise FormulaError("Size not set for opcode 0x%02x" % opcode) + pos += sz + continue + if opcode == 0x00: # tArray + spush(unk_opnd) + elif opcode == 0x01: # tFunc + nb = 1 + int(bv >= 40) + funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb])[0] + func_attrs = func_defs.get(funcx, None) + if not func_attrs: + print("*** formula/tFunc unknown FuncID:%d" \ + % funcx, file=bk.logfile) + spush(unk_opnd) + else: + func_name, nargs = func_attrs[:2] + if verbose: + print(" FuncID=%d name=%s nargs=%d" \ + % (funcx, func_name, nargs)) + assert len(stack) >= nargs + argtext = listsep.join([arg.text for arg in stack[-nargs:]]) + otext = "%s(%s)" % (func_name, argtext) + del stack[-nargs:] + res = Operand(oUNK, None, FUNC_RANK, otext) + spush(res) + elif opcode == 0x02: #tFuncVar + nb = 1 + int(bv >= 40) + nargs, funcx = unpack("= nargs + assert len(stack) >= nargs + argtext = listsep.join([arg.text for arg in stack[-nargs:]]) + otext = "%s(%s)" % (func_name, argtext) + res = Operand(oUNK, None, FUNC_RANK, otext) + if funcx == 1: # IF + testarg = stack[-nargs] + if testarg.kind not in (oNUM, oBOOL): + if verbose and testarg.kind != oUNK: + print("IF testarg kind?") + elif testarg.value not in (0, 1): + if verbose and testarg.value is not None: + print("IF testarg value?") + else: + if nargs == 2 and not testarg.value: + # IF(FALSE, tv) => FALSE + res.kind, res.value = oBOOL, 0 + else: + respos = -nargs + 2 - int(testarg.value) + chosen = stack[respos] + if chosen.kind == oMSNG: + res.kind, res.value = oNUM, 0 + else: + res.kind, res.value = chosen.kind, chosen.value + if verbose: + print("$$$$$$ IF => constant") + elif funcx == 100: # CHOOSE + testarg = stack[-nargs] + if testarg.kind == oNUM: + if 1 <= testarg.value < nargs: + chosen = stack[-nargs + int(testarg.value)] + if chosen.kind == oMSNG: + res.kind, res.value = oNUM, 0 + else: + res.kind, res.value = chosen.kind, chosen.value + del stack[-nargs:] + spush(res) + elif opcode == 0x03: #tName + tgtnamex = unpack("> bk.logfile, " ", res + # spush(res) + elif opcode == 0x0D: #tAreaN + not_in_name_formula(op, oname) + # res = get_cell_range_addr(data, pos+1, bv, reldelta=1) + # # note *ALL* tAreaN usage has signed offset for relative addresses + # any_rel = 1 + # if verbose: print >> bk.logfile, " ", res + elif opcode == 0x1A: # tRef3d + if bv >= 80: + res = get_cell_addr(data, pos+3, bv, reldelta) + refx = unpack("= 80: + res1, res2 = get_cell_range_addr(data, pos+3, bv, reldelta) + refx = unpack("= 80: + refx, tgtnamex = unpack(" 0: + refx -= 1 + elif refx < 0: + refx = -refx - 1 + else: + dodgy = 1 + if verbose: + print(" origrefx=%d refx=%d tgtnamex=%d dodgy=%d" \ + % (origrefx, refx, tgtnamex, dodgy), file=bk.logfile) + if tgtnamex == namex: + if verbose: print("!!!! Self-referential !!!!", file=bk.logfile) + dodgy = any_err = 1 + if not dodgy: + if bv >= 80: + shx1, shx2 = get_externsheet_local_range(bk, refx, verbose) + elif origrefx > 0: + shx1, shx2 = (-4, -4) # external ref + else: + exty = bk._externsheet_type_b57[refx] + if exty == 4: # non-specific sheet in own doc't + shx1, shx2 = (-1, -1) # internal, any sheet + else: + shx1, shx2 = (-666, -666) + if dodgy or shx1 < -1: + otext = "<>" \ + % (tgtnamex, origrefx) + res = Operand(oUNK, None, LEAF_RANK, otext) + else: + tgtobj = bk.name_obj_list[tgtnamex] + if not tgtobj.evaluated: + ### recursive ### + evaluate_name_formula(bk, tgtobj, tgtnamex, verbose, level+1) + if tgtobj.macro or tgtobj.binary \ + or tgtobj.any_err: + if verbose: + tgtobj.dump( + bk.logfile, + header="!!! bad tgtobj !!!", + footer="------------------", + ) + res = Operand(oUNK, None) + any_err = any_err or tgtobj.macro or tgtobj.binary or tgtobj.any_err + any_rel = any_rel or tgtobj.any_rel + else: + assert len(tgtobj.stack) == 1 + res = copy.deepcopy(tgtobj.stack[0]) + res.rank = LEAF_RANK + if tgtobj.scope == -1: + res.text = tgtobj.name + else: + res.text = "%s!%s" \ + % (bk._sheet_names[tgtobj.scope], tgtobj.name) + if verbose: + print(" tNameX: setting text to", repr(res.text), file=bk.logfile) + spush(res) + elif opcode in _error_opcodes: + any_err = 1 + spush(error_opnd) + else: + if verbose: + print("FORMULA: /// Not handled yet: t" + oname, file=bk.logfile) + any_err = 1 + if sz <= 0: + raise FormulaError("Fatal: token size is not positive") + pos += sz + any_rel = not not any_rel + if verbose: + print("End of formula. level=%d any_rel=%d any_err=%d stack=%r" % \ + (level, not not any_rel, any_err, stack)) + if len(stack) >= 2: + print("*** Stack has unprocessed args") + print() + nobj.stack = stack + if len(stack) != 1: + nobj.result = None + else: + nobj.result = stack[0] + nobj.any_rel = any_rel + nobj.any_err = any_err + nobj.any_external = any_external + nobj.evaluated = 1 + +#### under construction #### +def decompile_formula(bk, fmla, fmlalen, + reldelta, browx=None, bcolx=None, + # browx & bcolx are required when reldelta == 0 + verbose=0, level=0): + if level > STACK_ALARM_LEVEL: + verbose = 1 + data = fmla + bv = bk.biff_version + if verbose: + print("::: decompile_formula len=%d reldelta=%d %r level=%d" \ + % (fmlalen, reldelta, data, level)) + hex_char_dump(data, 0, fmlalen) + if level > STACK_PANIC_LEVEL: + raise XLRDError("Excessive indirect references in formula") + sztab = szdict[bv] + pos = 0 + stack = [] + any_rel = 0 + any_err = 0 + any_external = 0 + unk_opnd = Operand(oUNK, None) + error_opnd = Operand(oERR, None) + spush = stack.append + + def do_binop(opcd, stk): + assert len(stk) >= 2 + bop = stk.pop() + aop = stk.pop() + argdict, result_kind, func, rank, sym = binop_rules[opcd] + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + resop = Operand(result_kind, None, rank, otext) + stk.append(resop) + + def do_unaryop(opcode, arglist, result_kind, stk): + assert len(stk) >= 1 + aop = stk.pop() + assert aop.kind in arglist + func, rank, sym1, sym2 = unop_rules[opcode] + otext = ''.join([ + sym1, + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym2, + ]) + stk.append(Operand(result_kind, None, rank, otext)) + + def not_in_name_formula(op_arg, oname_arg): + msg = "ERROR *** Unexpected token 0x%02x (%s) found in formula" \ + % (op_arg, oname_arg) + # print msg + raise FormulaError(msg) + + if fmlalen == 0: + stack = [unk_opnd] + + while 0 <= pos < fmlalen: + op = data[pos] + opcode = op & 0x1f + optype = (op & 0x60) >> 5 + if optype: + opx = opcode + 32 + else: + opx = opcode + oname = onames[opx] # + [" RVA"][optype] + sz = sztab[opx] + if verbose: + print("Pos:%d Op:0x%02x opname:t%s Sz:%d opcode:%02xh optype:%02xh" \ + % (pos, op, oname, sz, opcode, optype)) + print("Stack =", stack) + if sz == -2: + msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' \ + % (op, oname, bv) + raise FormulaError(msg) + if not optype: + if 0x00 <= opcode <= 0x02: # unk_opnd, tExp, tTbl + not_in_name_formula(op, oname) + elif 0x03 <= opcode <= 0x0E: + # Add, Sub, Mul, Div, Power + # tConcat + # tLT, ..., tNE + do_binop(opcode, stack) + elif opcode == 0x0F: # tIsect + if verbose: print("tIsect pre", stack, file=bk.logfile) + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ' ' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF) + res.text = otext + if bop.kind == oERR or aop.kind == oERR: + res.kind = oERR + elif bop.kind == oUNK or aop.kind == oUNK: + # This can happen with undefined + # (go search in the current sheet) labels. + # For example =Bob Sales + # Each label gets a NAME record with an empty formula (!) + # Evaluation of the tName token classifies it as oUNK + # res.kind = oREF + pass + elif bop.kind == oREF == aop.kind: + pass + elif bop.kind == oREL == aop.kind: + res.kind = oREL + else: + pass + spush(res) + if verbose: print("tIsect post", stack, file=bk.logfile) + elif opcode == 0x10: # tList + if verbose: print("tList pre", stack, file=bk.logfile) + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ',' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF, None, rank, otext) + if bop.kind == oERR or aop.kind == oERR: + res.kind = oERR + elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL): + res.kind = oREF + if aop.kind == oREL or bop.kind == oREL: + res.kind = oREL + else: + pass + spush(res) + if verbose: print("tList post", stack, file=bk.logfile) + elif opcode == 0x11: # tRange + if verbose: print("tRange pre", stack, file=bk.logfile) + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ':' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF, None, rank, otext) + if bop.kind == oERR or aop.kind == oERR: + res = oERR + elif bop.kind == oREF == aop.kind: + pass + else: + pass + spush(res) + if verbose: print("tRange post", stack, file=bk.logfile) + elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent + do_unaryop(opcode, (oUNK, oNUM,), oNUM, stack) + elif opcode == 0x15: # tParen + # source cosmetics + pass + elif opcode == 0x16: # tMissArg + spush(Operand(oMSNG, None, LEAF_RANK, '')) + elif opcode == 0x17: # tStr + if bv <= 70: + strg, newpos = unpack_string_update_pos( + data, pos+1, bk.encoding, lenlen=1) + else: + strg, newpos = unpack_unicode_update_pos( + data, pos+1, lenlen=1) + sz = newpos - pos + if verbose: print(" sz=%d strg=%r" % (sz, strg), file=bk.logfile) + text = '"' + strg.replace('"', '""') + '"' + spush(Operand(oSTRG, None, LEAF_RANK, text)) + elif opcode == 0x18: # tExtended + # new with BIFF 8 + assert bv >= 80 + # not in OOo docs + raise FormulaError("tExtended token not implemented") + elif opcode == 0x19: # tAttr + subop, nc = unpack("= 1 + aop = stack[-1] + otext = 'SUM(%s)' % aop.text + stack[-1] = Operand(oNUM, None, FUNC_RANK, otext) + else: + sz = 4 + if verbose: + print(" subop=%02xh subname=t%s sz=%d nc=%02xh" \ + % (subop, subname, sz, nc)) + elif 0x1A <= opcode <= 0x1B: # tSheet, tEndSheet + assert bv < 50 + raise FormulaError("tSheet & tEndsheet tokens not implemented") + elif 0x1C <= opcode <= 0x1F: # tErr, tBool, tInt, tNum + inx = opcode - 0x1C + nb = [1, 1, 2, 8][inx] + kind = [oERR, oBOOL, oNUM, oNUM][inx] + value, = unpack("<" + "BBHd"[inx], data[pos+1:pos+1+nb]) + if inx == 2: # tInt + value = float(value) + text = str(value) + elif inx == 3: # tNum + text = str(value) + elif inx == 1: # tBool + text = ('FALSE', 'TRUE')[value] + else: + text = '"' +error_text_from_code[value] + '"' + spush(Operand(kind, None, LEAF_RANK, text)) + else: + raise FormulaError("Unhandled opcode: 0x%02x" % opcode) + if sz <= 0: + raise FormulaError("Size not set for opcode 0x%02x" % opcode) + pos += sz + continue + if opcode == 0x00: # tArray + spush(unk_opnd) + elif opcode == 0x01: # tFunc + nb = 1 + int(bv >= 40) + funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb])[0] + func_attrs = func_defs.get(funcx, None) + if not func_attrs: + print("*** formula/tFunc unknown FuncID:%d" % funcx, file=bk.logfile) + spush(unk_opnd) + else: + func_name, nargs = func_attrs[:2] + if verbose: + print(" FuncID=%d name=%s nargs=%d" \ + % (funcx, func_name, nargs)) + assert len(stack) >= nargs + argtext = listsep.join([arg.text for arg in stack[-nargs:]]) + otext = "%s(%s)" % (func_name, argtext) + del stack[-nargs:] + res = Operand(oUNK, None, FUNC_RANK, otext) + spush(res) + elif opcode == 0x02: #tFuncVar + nb = 1 + int(bv >= 40) + nargs, funcx = unpack("= nargs + assert len(stack) >= nargs + argtext = listsep.join([arg.text for arg in stack[-nargs:]]) + otext = "%s(%s)" % (func_name, argtext) + res = Operand(oUNK, None, FUNC_RANK, otext) + del stack[-nargs:] + spush(res) + elif opcode == 0x03: #tName + tgtnamex = unpack("> bk.logfile, " ", res + # spush(res) + elif opcode == 0x0D: #tAreaN + not_in_name_formula(op, oname) + # res = get_cell_range_addr(data, pos+1, bv, reldelta=1) + # # note *ALL* tAreaN usage has signed offset for relative addresses + # any_rel = 1 + # if verbose: print >> bk.logfile, " ", res + elif opcode == 0x1A: # tRef3d + if bv >= 80: + res = get_cell_addr(data, pos+3, bv, reldelta, browx, bcolx) + refx = unpack("= 80: + res1, res2 = get_cell_range_addr(data, pos+3, bv, reldelta) + refx = unpack("= 80: + refx, tgtnamex = unpack(" 0: + refx -= 1 + elif refx < 0: + refx = -refx - 1 + else: + dodgy = 1 + if verbose: + print(" origrefx=%d refx=%d tgtnamex=%d dodgy=%d" \ + % (origrefx, refx, tgtnamex, dodgy), file=bk.logfile) + # if tgtnamex == namex: + # if verbose: print >> bk.logfile, "!!!! Self-referential !!!!" + # dodgy = any_err = 1 + if not dodgy: + if bv >= 80: + shx1, shx2 = get_externsheet_local_range(bk, refx, verbose) + elif origrefx > 0: + shx1, shx2 = (-4, -4) # external ref + else: + exty = bk._externsheet_type_b57[refx] + if exty == 4: # non-specific sheet in own doc't + shx1, shx2 = (-1, -1) # internal, any sheet + else: + shx1, shx2 = (-666, -666) + okind = oUNK + ovalue = None + if shx1 == -5: # addin func name + okind = oSTRG + ovalue = bk.addin_func_names[tgtnamex] + otext = '"' + ovalue.replace('"', '""') + '"' + elif dodgy or shx1 < -1: + otext = "<>" \ + % (tgtnamex, origrefx) + else: + tgtobj = bk.name_obj_list[tgtnamex] + if tgtobj.scope == -1: + otext = tgtobj.name + else: + otext = "%s!%s" \ + % (bk._sheet_names[tgtobj.scope], tgtobj.name) + if verbose: + print(" tNameX: setting text to", repr(res.text), file=bk.logfile) + res = Operand(okind, ovalue, LEAF_RANK, otext) + spush(res) + elif opcode in _error_opcodes: + any_err = 1 + spush(error_opnd) + else: + if verbose: + print("FORMULA: /// Not handled yet: t" + oname, file=bk.logfile) + any_err = 1 + if sz <= 0: + raise FormulaError("Fatal: token size is not positive") + pos += sz + any_rel = not not any_rel + if verbose: + print("End of formula. level=%d any_rel=%d any_err=%d stack=%r" % \ + (level, not not any_rel, any_err, stack)) + if len(stack) >= 2: + print("*** Stack has unprocessed args") + print() + + if len(stack) != 1: + result = None + else: + result = stack[0].text + return result + +#### under deconstruction ### +def dump_formula(bk, data, fmlalen, bv, reldelta, verbose=0, isname=0): + if verbose: + print("dump_formula", fmlalen, bv, len(data)) + hex_char_dump(data, 0, fmlalen) + assert bv >= 80 #### this function needs updating #### + sztab = szdict[bv] + pos = 0 + stack = [] + any_rel = 0 + any_err = 0 + spush = stack.append + while 0 <= pos < fmlalen: + op = data[pos] + opcode = op & 0x1f + optype = (op & 0x60) >> 5 + if optype: + opx = opcode + 32 + else: + opx = opcode + oname = onames[opx] # + [" RVA"][optype] + + sz = sztab[opx] + if verbose: + print("Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh" \ + % (pos, op, oname, sz, opcode, optype)) + if not optype: + if 0x01 <= opcode <= 0x02: # tExp, tTbl + # reference to a shared formula or table record + rowx, colx = unpack("= 2 + bop = stack.pop() + aop = stack.pop() + spush(aop + bop) + if verbose: print("tlist post", stack, file=bk.logfile) + elif opcode == 0x11: # tRange + if verbose: print("tRange pre", stack, file=bk.logfile) + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + assert len(aop) == 1 + assert len(bop) == 1 + result = do_box_funcs(tRangeFuncs, aop[0], bop[0]) + spush(result) + if verbose: print("tRange post", stack, file=bk.logfile) + elif opcode == 0x0F: # tIsect + if verbose: print("tIsect pre", stack, file=bk.logfile) + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + assert len(aop) == 1 + assert len(bop) == 1 + result = do_box_funcs(tIsectFuncs, aop[0], bop[0]) + spush(result) + if verbose: print("tIsect post", stack, file=bk.logfile) + elif opcode == 0x19: # tAttr + subop, nc = unpack("= 40) + funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb]) + if verbose: print(" FuncID=%d" % funcx, file=bk.logfile) + elif opcode == 0x02: #tFuncVar + nb = 1 + int(bv >= 40) + nargs, funcx = unpack("= 2: + print("*** Stack has unprocessed args", file=bk.logfile) + +# === Some helper functions for displaying cell references === + +# Note that a "non-standard" syntax is used in row and column +# components in relative references. +# For example, consider a relative reference: up two rows, right 3 columns. +# On screen, with cursor in cell D10, this would appear as G8. +# On screen, with cursor in cell Z100, this would appear as AC98. +# On screen, with cursor in cell A1, this would appear as D65535. +# These functions will display such a reference as [@+3,#-2]. +# "@" refers to the unknown base column. +# "#" refers to the unknown base row. +# +# I'm aware of only one possibility of a sheet-relative component in +# a reference: a 2D reference located in the "current sheet". +# xlrd stores this internally with bounds of (0, 1, ...) and +# relative flags of (1, 1, ...). These functions display the +# sheet component as empty, just like Excel etc. + +def rownamerel(rowx, rowxrel): + if not rowxrel: + return "$%d" % rowx + if rowx > 0: + return "#+%d" % rowx + if rowx < 0: + return "#-%d" % (-rowx) + return "#" + +def colnamerel(colx, colxrel): + if not colxrel: + return "$" + colname(colx) + if colx > 0: + return "@+%d" % colx + if colx < 0: + return "@-%d" % (-colx) + return "@" +## +# Utility function: (5, 7) => 'H6' +def cellname(rowx, colx): + """ (5, 7) => 'H6' """ + return "%s%d" % (colname(colx), rowx+1) + +## +# Utility function: (5, 7) => '$H$6' +def cellnameabs(rowx, colx): + """ (5, 7) => '$H$6' """ + return "$%s$%d" % (colname(colx), rowx+1) + +def cellnamerel(rowx, colx, rowxrel, colxrel): + if not rowxrel and not colxrel: + return cellnameabs(rowx, colx) + return "[%s,%s]" % ( + colnamerel(colx, colxrel), + rownamerel(rowx, rowxrel)) +## +# Utility function: 7 => 'H', 27 => 'AB' +def colname(colx): + """ 7 => 'H', 27 => 'AB' """ + alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + if colx <= 25: + return alphabet[colx] + else: + xdiv26, xmod26 = divmod(colx, 26) + return alphabet[xdiv26 - 1] + alphabet[xmod26] + +def rangename2d(rlo, rhi, clo, chi): + """ (5, 20, 7, 10) => '$H$6:$J$20' """ + if rhi == rlo+1 and chi == clo+1: + return cellnameabs(rlo, clo) + return "%s:%s" % (cellnameabs(rlo, clo), cellnameabs(rhi-1, chi-1)) + +def rangename2drel(xxx_todo_changeme, xxx_todo_changeme1): + (rlo, rhi, clo, chi) = xxx_todo_changeme + (rlorel, rhirel, clorel, chirel) = xxx_todo_changeme1 + return "%s:%s" % ( + cellnamerel(rlo, clo, rlorel, clorel), + cellnamerel(rhi-1, chi-1, rhirel, chirel) + ) +## +# Utility function: +#
Ref3D((1, 4, 5, 20, 7, 10)) => 'Sheet2:Sheet3!$H$6:$J$20' +def rangename3d(book, ref3d): + """ Ref3D(1, 4, 5, 20, 7, 10) => 'Sheet2:Sheet3!$H$6:$J$20' + (assuming Excel's default sheetnames) """ + coords = ref3d.coords + return "%s!%s" % ( + sheetrange(book, *coords[:2]), + rangename2d(*coords[2:6])) + +## +# Utility function: +#
Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1)) +# => 'Sheet1![@-13,#-32]:[@+12,#-23]' +# where '@' refers to the current or base column and '#' +# refers to the current or base row. +def rangename3drel(book, ref3d): + coords = ref3d.coords + relflags = ref3d.relflags + shdesc = sheetrangerel(book, coords[:2], relflags[:2]) + rngdesc = rangename2drel(coords[2:6], relflags[2:6]) + if not shdesc: + return rngdesc + return "%s!%s" % (shdesc, rngdesc) + +def quotedsheetname(shnames, shx): + if shx >= 0: + shname = shnames[shx] + else: + shname = { + -1: "?internal; any sheet?", + -2: "internal; deleted sheet", + -3: "internal; macro sheet", + -4: "<>", + }.get(shx, "?error %d?" % shx) + if "'" in shname: + return "'" + shname.replace("'", "''") + "'" + if " " in shname: + return "'" + shname + "'" + return shname + +def sheetrange(book, slo, shi): + shnames = book.sheet_names() + shdesc = quotedsheetname(shnames, slo) + if slo != shi-1: + shdesc += ":" + quotedsheetname(shnames, shi-1) + return shdesc + +def sheetrangerel(book, xxx_todo_changeme2, xxx_todo_changeme3): + (slo, shi) = xxx_todo_changeme2 + (slorel, shirel) = xxx_todo_changeme3 + if not slorel and not shirel: + return sheetrange(book, slo, shi) + assert (slo == 0 == shi-1) and slorel and shirel + return "" + +# ============================================================== diff --git a/tablib/packages/xlrd3/sheet.py b/tablib/packages/xlrd3/sheet.py new file mode 100644 index 0000000..5911fb0 --- /dev/null +++ b/tablib/packages/xlrd3/sheet.py @@ -0,0 +1,1611 @@ +# Portions copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd +# This module is part of the xlrd3 package, which is released under a +# BSD-style licence. + +# 2009-05-31 SJM Fixed problem with no CODEPAGE record on extremely minimal BIFF2.x 3rd-party file +# 2009-04-27 SJM Integrated on_demand patch by Armando Serrano Lombillo +# 2008-02-09 SJM Excel 2.0: build XFs on the fly from cell attributes +# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. +# 2007-10-11 SJM Added missing entry for blank cell type to ctype_text +# 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file +# 2007-04-22 SJM Remove experimental "trimming" facility. + +#for debugging only +from math import isnan + +import time +from struct import unpack +from array import array + +from .biffh import * +from .formula import dump_formula, decompile_formula, rangename2d +from .formatting import nearest_colour_index, Format +from .xfcell import XFCell + +DEBUG = 0 +OBJ_MSO_DEBUG = 0 + +_WINDOW2_options = ( + # Attribute names and initial values to use in case + # a WINDOW2 record is not written. + ("show_formulas", 0), + ("show_grid_lines", 1), + ("show_sheet_headers", 1), + ("panes_are_frozen", 0), + ("show_zero_values", 1), + ("automatic_grid_line_colour", 1), + ("columns_from_right_to_left", 0), + ("show_outline_symbols", 1), + ("remove_splits_if_pane_freeze_is_removed", 0), + ("sheet_selected", 0), + # "sheet_visible" appears to be merely a clone of "sheet_selected". + # The real thing is the visibility attribute from the BOUNDSHEET record. + ("sheet_visible", 0), + ("show_in_page_break_preview", 0), + ) + +def int_floor_div(x, y): + return divmod(x, y)[0] + +class Sheet(BaseObject): + """Contains the data for one worksheet. + + In the cell access functions, "rowx" is a row index, counting from zero, + and "colx" is a column index, counting from zero. + Negative values for row/column indexes and slice positions are supported in + the expected fashion. + + For information about cell types and cell values, refer to the documentation + of the Cell class. + + WARNING: You don't call this class yourself. You access Sheet objects via + the Book object that was returned when you called xlrd.open_workbook("myfile.xls"). + """ + + # Name of sheet. + name = '' + + # Number of rows in sheet. A row index is in range(thesheet.nrows). + nrows = 0 + + # Number of columns in sheet. A column index is in range(thesheet.ncols). + ncols = 0 + + # The map from a column index to a Colinfo object. Often there is an entry + # in COLINFO records for all column indexes in range(257). + # Note that xlrd ignores the entry for the non-existent + # 257th column. On the other hand, there may be no entry for unused columns. + # - New in version 0.6.1 + colinfo_map = {} + + # The map from a row index to a Rowinfo object. Note that it is possible + # to have missing entries -- at least one source of XLS files doesn't + # bother writing ROW records. + # - New in version 0.6.1 + rowinfo_map = {} + + # List of address ranges of cells containing column labels. + # These are set up in Excel by Insert > Name > Labels > Columns. + # - New in version 0.6.0 + # How to deconstruct the list:: + # + # for crange in thesheet.col_label_ranges: + # rlo, rhi, clo, chi = crange + # for rx in xrange(rlo, rhi): + # for cx in xrange(clo, chi): + # print "Column label at (rowx=%d, colx=%d) is %r" \ + # (rx, cx, thesheet.cell_value(rx, cx)) + # + col_label_ranges = [] + + # List of address ranges of cells containing row labels. + # For more details, see col_label_ranges above. + # - New in version 0.6.0 + row_label_ranges = [] + + # List of address ranges of cells which have been merged. + # These are set up in Excel by Format > Cells > Alignment, then ticking + # the "Merge cells" box. + # - New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True) + # How to deconstruct the list:: + # + # for crange in thesheet.merged_cells: + # rlo, rhi, clo, chi = crange + # for rowx in xrange(rlo, rhi): + # for colx in xrange(clo, chi): + # # cell (rlo, clo) (the top left one) will carry the data + # # and formatting info; the remainder will be recorded as + # # blank cells, but a renderer will apply the formatting info + # # for the top left cell (e.g. border, pattern) to all cells in + # # the range. + # + merged_cells = [] + + # Default column width from DEFCOLWIDTH record, else None. + # From the OOo docs: + # """Column width in characters, using the width of the zero character + # from default font (first FONT record in the file). Excel adds some + # extra space to the default width, depending on the default font and + # default font size. The algorithm how to exactly calculate the resulting + # column width is not known. + # Example: The default width of 8 set in this record results in a column + # width of 8.43 using Arial font with a size of 10 points.""" + # For the default hierarchy, refer to the Colinfo class above. + # - New in version 0.6.1 + defcolwidth = None + + # Default column width from STANDARDWIDTH record, else None. + # From the OOo docs: + # """Default width of the columns in 1/256 of the width of the zero + # character, using default font (first FONT record in the file).""" + # For the default hierarchy, refer to the Colinfo class above. + # - New in version 0.6.1 + standardwidth = None + + # Default value to be used for a row if there is + # no ROW record for that row. + # From the optional DEFAULTROWHEIGHT record. + default_row_height = None + + # Default value to be used for a row if there is + # no ROW record for that row. + # From the ´optional´ DEFAULTROWHEIGHT record. + default_row_height_mismatch = None + + # Default value to be used for a row if there is + # no ROW record for that row. + # From the ´optional´ DEFAULTROWHEIGHT record. + default_row_hidden = None + + # Default value to be used for a row if there is + # no ROW record for that row. + # From the optional DEFAULTROWHEIGHT record. + default_additional_space_above = None + + # Default value to be used for a row if there is + # no ROW record for that row. + # From the optional DEFAULTROWHEIGHT record. + default_additional_space_below = None + + # Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden + # by user -- Format/Sheet/Unhide), 2 = "very hidden" (can be unhidden + # only by VBA macro). + visibility = 0 + + # A 256-element tuple corresponding to the contents of the GCW record for this sheet. + # If no such record, treat as all bits zero. + # Applies to BIFF4-7 only. See docs of Colinfo class for discussion. + gcw = (0, ) * 256 + + def __init__(self, book, position, name, number): + self.book = book + self.biff_version = book.biff_version + self._position = position + self.logfile = book.logfile + self.pickleable = book.pickleable + # (to_py3) self.dont_use_array = not(array_array and (CAN_PICKLE_ARRAY or not book.pickleable)) + self.name = name + self.number = number + self.verbosity = book.verbosity + self.formatting_info = book.formatting_info + self._xf_index_to_xl_type_map = book._xf_index_to_xl_type_map + self.nrows = 0 # actual, including possibly empty cells + self.ncols = 0 + self._maxdatarowx = -1 # highest rowx containing a non-empty cell + self._maxdatacolx = -1 # highest colx containing a non-empty cell + self._dimnrows = 0 # as per DIMENSIONS record + self._dimncols = 0 + self._cell_values = [] + self._cell_types = [] + self._cell_xf_indexes = [] + self._need_fix_ragged_rows = 0 + self.defcolwidth = None + self.standardwidth = None + self.default_row_height = None + self.default_row_height_mismatch = 0 + self.default_row_hidden = 0 + self.default_additional_space_above = 0 + self.default_additional_space_below = 0 + self.colinfo_map = {} + self.rowinfo_map = {} + self.col_label_ranges = [] + self.row_label_ranges = [] + self.merged_cells = [] + self._xf_index_stats = [0, 0, 0, 0] + self.visibility = book._sheet_visibility[number] # from BOUNDSHEET record + for attr, defval in _WINDOW2_options: + setattr(self, attr, defval) + self.first_visible_rowx = 0 + self.first_visible_colx = 0 + self.gridline_colour_index = 0x40 + self.gridline_colour_rgb = None # pre-BIFF8 + self.cached_page_break_preview_mag_factor = 0 + self.cached_normal_view_mag_factor = 0 + self._ixfe = None # BIFF2 only + self._cell_attr_to_xfx = {} # BIFF2.0 only + + #### Don't initialise this here, use class attribute initialisation. + #### self.gcw = (0, ) * 256 #### + + if self.biff_version >= 80: + self.utter_max_rows = 65536 + else: + self.utter_max_rows = 16384 + self.utter_max_cols = 256 + + def cell(self, rowx, colx): + """ Get the XFCell() object in the given row and column. """ + if self.formatting_info: + xf_index = self.cell_xf_index(rowx, colx) + else: + xf_index = None + ctype = self.cell_type(rowx, colx) + value = self.cell_value(rowx, colx) + return Cell(ctype, value, xf_index, self) + + def cell_value(self, rowx, colx): + """ Value of the cell in the given row and column. """ + return self._cell_values[rowx][colx] + + def cell_type(self, rowx, colx): + """ Type of the cell in the given row and column. + Refer to the documentation of the Cell class. + """ + return self._cell_types[rowx][colx] + + # New in version 0.6.1 + def cell_xf_index(self, rowx, colx): + """ XF index of the cell in the given row and column. + This is an index into Book.xf_list. + """ + self.req_fmt_info() + xfx = self._cell_xf_indexes[rowx][colx] + if xfx > -1: + self._xf_index_stats[0] += 1 + return xfx + # Check for a row xf_index + try: + xfx = self.rowinfo_map[rowx].xf_index + if xfx > -1: + self._xf_index_stats[1] += 1 + return xfx + except KeyError: + pass + # Check for a column xf_index + try: + xfx = self.colinfo_map[colx].xf_index + assert xfx > -1 + self._xf_index_stats[2] += 1 + return xfx + except KeyError: + # If all else fails, 15 is used as hardwired global default xf_index. + self._xf_index_stats[3] += 1 + return 15 + + + def row(self, rowx): + """ Returns a sequence of the Cell objects in the given row. """ + return [self.cell(rowx, colx) for colx in range(self.ncols)] + + def row_types(self, rowx, start_colx=0, end_colx=None): + """ Returns a slice of the types of the cells in the given row. """ + if end_colx is None: + return self._cell_types[rowx][start_colx:] + return self._cell_types[rowx][start_colx:end_colx] + + def row_values(self, rowx, start_colx=0, end_colx=None): + """ Returns a slice of the values of the cells in the given row. """ + if end_colx is None: + return self._cell_values[rowx][start_colx:] + return self._cell_values[rowx][start_colx:end_colx] + + def row_slice(self, rowx, start_colx=0, end_colx=None): + """ Returns a slice of the Cell objects in the given row. """ + nc = self.ncols + if start_colx < 0: + start_colx += nc + if start_colx < 0: + start_colx = 0 + if end_colx is None or end_colx > nc: + end_colx = nc + elif end_colx < 0: + end_colx += nc + return [self.cell(rowx, colx) for colx in range(start_colx, end_colx)] + + def col_slice(self, colx, start_rowx=0, end_rowx=None): + """ Returns a slice of the Cell objects in the given column. """ + nr = self.nrows + if start_rowx < 0: + start_rowx += nr + if start_rowx < 0: + start_rowx = 0 + if end_rowx is None or end_rowx > nr: + end_rowx = nr + elif end_rowx < 0: + end_rowx += nr + return [self.cell(rowx, colx) for rowx in range(start_rowx, end_rowx)] + + col = col_slice + """ Returns a sequence of the Cell objects in the given column. """ + + def col_values(self, colx, start_rowx=0, end_rowx=None): + """ Returns a slice of the values of the cells in the given column. """ + nr = self.nrows + if start_rowx < 0: + start_rowx += nr + if start_rowx < 0: + start_rowx = 0 + if end_rowx is None or end_rowx > nr: + end_rowx = nr + elif end_rowx < 0: + end_rowx += nr + return [self._cell_values[rowx][colx] for rowx in range(start_rowx, end_rowx)] + + def col_types(self, colx, start_rowx=0, end_rowx=None): + """ Returns a slice of the types of the cells in the given column. """ + nr = self.nrows + if start_rowx < 0: + start_rowx += nr + if start_rowx < 0: + start_rowx = 0 + if end_rowx is None or end_rowx > nr: + end_rowx = nr + elif end_rowx < 0: + end_rowx += nr + return [self._cell_types[rowx][colx] for rowx in range(start_rowx, end_rowx)] + + # Following methods are used in building the worksheet. + # They are not part of the API. + + def extend_cells(self, nr, nc): + assert 1 <= nc <= self.utter_max_cols + assert 1 <= nr <= self.utter_max_rows + if nr <= self.nrows: + # New cell is in an existing row, so extend that row (if necessary). + # Note that nr < self.nrows means that the cell data + # is not in ascending row order!! + self._need_fix_ragged_rows = 1 + nrx = nr - 1 + trow = self._cell_types[nrx] + tlen = len(trow) + nextra = max(nc, self.ncols) - tlen + if nextra > 0: + xce = XL_CELL_EMPTY + #(to_py3) if self.dont_use_array: ... removed + trow.extend(array('B', [xce]) * nextra) + if self.formatting_info: + self._cell_xf_indexes[nrx].extend(array('h', [-1]) * nextra) + self._cell_values[nrx].extend([''] * nextra) + if nc > self.ncols: + self.ncols = nc + self._need_fix_ragged_rows = 1 + if nr > self.nrows: + scta = self._cell_types.append + scva = self._cell_values.append + scxa = self._cell_xf_indexes.append + fmt_info = self.formatting_info + xce = XL_CELL_EMPTY + nc = self.ncols + + #(to_py3) if self.dont_use_array: ... removed + for _unused in range(self.nrows, nr): + scta(array('B', [xce]) * nc) + scva([''] * nc) + if fmt_info: + scxa(array('h', [-1]) * nc) + self.nrows = nr + + def fix_ragged_rows(self): + t0 = time.time() + ncols = self.ncols + xce = XL_CELL_EMPTY + s_cell_types = self._cell_types + s_cell_values = self._cell_values + s_cell_xf_indexes = self._cell_xf_indexes + s_fmt_info = self.formatting_info + totrowlen = 0 + for rowx in range(self.nrows): + trow = s_cell_types[rowx] + rlen = len(trow) + totrowlen += rlen + nextra = ncols - rlen + if nextra > 0: + s_cell_values[rowx][rlen:] = [''] * nextra + trow.extend(array('B', [xce]) * nextra) + if s_fmt_info: + s_cell_xf_indexes[rowx][rlen:] = array('h', [-1]) * nextra + self._fix_ragged_rows_time = time.time() - t0 + + def tidy_dimensions(self): + if self.verbosity >= 3: + fprintf(self.logfile, + "tidy_dimensions: nrows=%d ncols=%d _need_fix_ragged_rows=%d\n", + self.nrows, self.ncols, self._need_fix_ragged_rows) + if self.merged_cells: + nr = nc = 0 + umaxrows = self.utter_max_rows + umaxcols = self.utter_max_cols + for crange in self.merged_cells: + rlo, rhi, clo, chi = crange + if not (0 <= rlo < rhi <= umaxrows) \ + or not (0 <= clo < chi <= umaxcols): + fprintf(self.logfile, + "*** WARNING: sheet #%d (%r), MERGEDCELLS bad range %r\n", + self.number, self.name, crange) + if rhi > nr: nr = rhi + if chi > nc: nc = chi + self.extend_cells(nr, nc) + if self.verbosity >= 1 and \ + (self.nrows != self._dimnrows or self.ncols != self._dimncols): + fprintf(self.logfile, + "NOTE *** sheet %d (%r): DIMENSIONS R,C = %d,%d should be %d,%d\n", + self.number, + self.name, + self._dimnrows, + self._dimncols, + self.nrows, + self.ncols, + ) + if self._need_fix_ragged_rows: + self.fix_ragged_rows() + + def put_cell(self, rowx, colx, ctype, value, xf_index): + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + if self.formatting_info: + self._cell_xf_indexes[rowx][colx] = xf_index + except IndexError: + self.extend_cells(rowx+1, colx+1) + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + if self.formatting_info: + self._cell_xf_indexes[rowx][colx] = xf_index + except: + print("put_cell", rowx, colx, file=self.logfile) + raise + except: + print("put_cell", rowx, colx, file=self.logfile) + raise + + def put_blank_cell(self, rowx, colx, xf_index): + # This is used for cells from BLANK and MULBLANK records + ctype = XL_CELL_BLANK + value = '' + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + self._cell_xf_indexes[rowx][colx] = xf_index + except IndexError: + self.extend_cells(rowx+1, colx+1) + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + self._cell_xf_indexes[rowx][colx] = xf_index + except: + print("put_cell", rowx, colx, file=self.logfile) + raise + except: + print("put_cell", rowx, colx, file=self.logfile) + raise + + def put_number_cell(self, rowx, colx, value, xf_index): + # for debugging + if type(value) == float and isnan(value): + pass + ctype = self._xf_index_to_xl_type_map[xf_index] + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + if self.formatting_info: + self._cell_xf_indexes[rowx][colx] = xf_index + except IndexError: + self.extend_cells(rowx+1, colx+1) + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + if self.formatting_info: + self._cell_xf_indexes[rowx][colx] = xf_index + except: + print("put_number_cell", rowx, colx, file=self.logfile) + raise + except: + print("put_number_cell", rowx, colx, file=self.logfile) + raise + + # === Methods after this line neither know nor care about how cells are stored. + + def read(self, bk): + global rc_stats + DEBUG = 0 + verbose = DEBUG or self.verbosity >= 2 + verbose_rows = DEBUG or self.verbosity >= 4 + verbose_formulas = 1 and verbose + oldpos = bk._position + bk._position = self._position + XL_SHRFMLA_ETC_ETC = ( + XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2, + XL_ARRAY2, XL_TABLEOP_B2, + ) + self_put_number_cell = self.put_number_cell + self_put_cell = self.put_cell + self_put_blank_cell = self.put_blank_cell + local_unpack = unpack + bk_get_record_parts = bk.get_record_parts + bv = self.biff_version + fmt_info = self.formatting_info + eof_found = 0 + while 1: + rc, data_len, data = bk_get_record_parts() + if rc == XL_NUMBER: + rowx, colx, xf_index, d = local_unpack('> 15) & 1 + r.outline_level = bits2 & 7 + r.outline_group_starts_ends = (bits2 >> 4) & 1 + r.hidden = (bits2 >> 5) & 1 + r.height_mismatch = (bits2 >> 6) & 1 + r.has_default_xf_index = (bits2 >> 7) & 1 + r.xf_index = (bits2 >> 16) & 0xfff + r.additional_space_above = (bits2 >> 28) & 1 + r.additional_space_below = (bits2 >> 29) & 1 + if not r.has_default_xf_index: + r.xf_index = -1 + self.rowinfo_map[rowx] = r + if 0 and r.xf_index > -1: + fprintf(self.logfile, + "**ROW %d %d %d\n", + self.number, rowx, r.xf_index) + if verbose_rows: + print('ROW', rowx, bits1, bits2, file=self.logfile) + r.dump(self.logfile, + header="--- sh #%d, rowx=%d ---" % (self.number, rowx)) + elif rc in XL_FORMULA_OPCODES: # 06, 0206, 0406 + if bv >= 50: + # IMPORTANT result_str is bytes + rowx, colx, xf_index, result_str, flags = local_unpack('= 30: + rowx, colx, xf_index, result_str, flags = local_unpack(' 20)) + else: + strg = unpack_unicode(data2, 0, lenlen=2) + self.put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index) + # if DEBUG: print "FORMULA strg %r" % strg + elif result_str[0] == 1: #b'\x01': + # boolean formula result + value = result_str[2] + self.put_cell(rowx, colx, XL_CELL_BOOLEAN, value, xf_index) + elif result_str[0] == 2: #b'\x02': + # Error in cell + value = result_str[2] + self.put_cell(rowx, colx, XL_CELL_ERROR, value, xf_index) + elif result_str[0] == 3:#b'\x03': + # empty ... i.e. empty (zero-length) string, NOT an empty cell. + self.put_cell(rowx, colx, XL_CELL_TEXT, "", xf_index) + else: + raise XLRDError("unexpected special case (0x%02x) in FORMULA" % result_str[0]) + else: + # it is a number + d = local_unpack(' 255: break # Excel does 0 to 256 inclusive + self.colinfo_map[colx] = c + if 0: + fprintf(self.logfile, + "**COL %d %d %d\n", + self.number, colx, c.xf_index) + if verbose: + fprintf( + self.logfile, + "COLINFO sheet #%d cols %d-%d: wid=%d xf_index=%d flags=0x%04x\n", + self.number, first_colx, last_colx, c.width, c.xf_index, flags, + ) + c.dump(self.logfile, header='===') + elif rc == XL_DEFCOLWIDTH: + self.defcolwidth, = local_unpack(">= 1 + self.gcw = tuple(gcw) + if 0: + showgcw = "".join(["F "[x] for x in gcw]).rstrip().replace(' ', '.') + print("GCW:", showgcw) + elif rc == XL_BLANK: + if not fmt_info: continue + rowx, colx, xf_index = local_unpack(' found EOF", file=self.logfile) + elif rc == XL_COUNTRY: + bk.handle_country(data) + elif rc == XL_LABELRANGES: + pos = 0 + pos = unpack_cell_range_address_list_update_pos( + self.row_label_ranges, data, pos, bv, addr_size=8, + ) + pos = unpack_cell_range_address_list_update_pos( + self.col_label_ranges, data, pos, bv, addr_size=8, + ) + assert pos == data_len + elif rc == XL_ARRAY: + row1x, rownx, col1x, colnx, array_flags, tokslen = \ + local_unpack("= 80 + num_CFs, needs_recalc, browx1, browx2, bcolx1, bcolx2 = \ + unpack("<6H", data[0:12]) + if self.verbosity >= 1: + fprintf(self.logfile, + "\n*** WARNING: Ignoring CONDFMT (conditional formatting) record\n" \ + "*** in Sheet %d (%r).\n" \ + "*** %d CF record(s); needs_recalc_or_redraw = %d\n" \ + "*** Bounding box is %s\n", + self.number, self.name, num_CFs, needs_recalc, + rangename2d(browx1, browx2+1, bcolx1, bcolx2+1), + ) + olist = [] # updated by the function + pos = unpack_cell_range_address_list_update_pos( + olist, data, 12, bv, addr_size=8) + # print >> self.logfile, repr(result), len(result) + if self.verbosity >= 1: + fprintf(self.logfile, + "*** %d individual range(s):\n" \ + "*** %s\n", + len(olist), + ", ".join([rangename2d(*coords) for coords in olist]), + ) + elif rc == XL_CF: + if not fmt_info: continue + cf_type, cmp_op, sz1, sz2, flags = unpack("> 26) & 1 + bord_block = (flags >> 28) & 1 + patt_block = (flags >> 29) & 1 + if self.verbosity >= 1: + fprintf(self.logfile, + "\n*** WARNING: Ignoring CF (conditional formatting) sub-record.\n" \ + "*** cf_type=%d, cmp_op=%d, sz1=%d, sz2=%d, flags=0x%08x\n" \ + "*** optional data blocks: font=%d, border=%d, pattern=%d\n", + cf_type, cmp_op, sz1, sz2, flags, + font_block, bord_block, patt_block, + ) + # hex_char_dump(data, 0, data_len) + pos = 12 + if font_block: + (font_height, font_options, weight, escapement, underline, + font_colour_index, two_bits, font_esc, font_underl) = \ + unpack("<64x i i H H B 3x i 4x i i i 18x", data[pos:pos+118]) + font_style = (two_bits > 1) & 1 + posture = (font_options > 1) & 1 + font_canc = (two_bits > 7) & 1 + cancellation = (font_options > 7) & 1 + if self.verbosity >= 1: + fprintf(self.logfile, + "*** Font info: height=%d, weight=%d, escapement=%d,\n" \ + "*** underline=%d, colour_index=%d, esc=%d, underl=%d,\n" \ + "*** style=%d, posture=%d, canc=%d, cancellation=%d\n", + font_height, weight, escapement, underline, + font_colour_index, font_esc, font_underl, + font_style, posture, font_canc, cancellation, + ) + pos += 118 + if bord_block: + pos += 8 + if patt_block: + pos += 4 + fmla1 = data[pos:pos+sz1] + pos += sz1 + if verbose and sz1: + fprintf(self.logfile, + "*** formula 1:\n", + ) + dump_formula(bk, fmla1, sz1, bv, reldelta=0, verbose=1) + fmla2 = data[pos:pos+sz2] + pos += sz2 + assert pos == data_len + if verbose and sz2: + fprintf(self.logfile, + "*** formula 2:\n", + ) + dump_formula(bk, fmla2, sz2, bv, reldelta=0, verbose=1) + elif rc == XL_DEFAULTROWHEIGHT: + if data_len == 4: + bits, self.default_row_height = unpack("> 1) & 1 + self.default_additional_space_above = (bits >> 2) & 1 + self.default_additional_space_below = (bits >> 3) & 1 + elif rc == XL_MERGEDCELLS: + if not fmt_info: continue + pos = unpack_cell_range_address_list_update_pos( + self.merged_cells, data, 0, bv, addr_size=8) + if verbose: + fprintf(self.logfile, + "MERGEDCELLS: %d ranges\n", int_floor_div(pos - 2, 8)) + assert pos == data_len, \ + "MERGEDCELLS: pos=%d data_len=%d" % (pos, data_len) + elif rc == XL_WINDOW2: + if bv >= 80: + (options, + self.first_visible_rowx, self.first_visible_colx, + self.gridline_colour_index, + self.cached_page_break_preview_mag_factor, + self.cached_normal_view_mag_factor + ) = unpack(">= 1 + # print "WINDOW2: visible=%d selected=%d" \ + # % (self.sheet_visible, self.sheet_selected) + #### all of the following are for BIFF <= 4W + elif bv <= 45: + if rc == XL_FORMAT or rc == XL_FORMAT2: + bk.handle_format(data, rc) + elif rc == XL_FONT or rc == XL_FONT_B3B4: + bk.handle_font(data) + elif rc == XL_STYLE: + if not self.book._xf_epilogue_done: + self.book.xf_epilogue() + bk.handle_style(data) + elif rc == XL_PALETTE: + bk.handle_palette(data) + elif rc == XL_BUILTINFMTCOUNT: + bk.handle_builtinfmtcount(data) + elif rc == XL_XF4 or rc == XL_XF3 or rc == XL_XF2: #### N.B. not XL_XF + bk.handle_xf(data) + elif rc == XL_DATEMODE: + bk.handle_datemode(data) + elif rc == XL_CODEPAGE: + bk.handle_codepage(data) + elif rc == XL_FILEPASS: + bk.handle_filepass(data) + elif rc == XL_WRITEACCESS: + bk.handle_writeaccess(data) + elif rc == XL_IXFE: + self._ixfe = local_unpack('> 15) & 1 + r.outline_level = 0 + r.outline_group_starts_ends = 0 + r.hidden = 0 + r.height_mismatch = 0 + r.has_default_xf_index = has_defaults & 1 + r.additional_space_above = 0 + r.additional_space_below = 0 + if not r.has_default_xf_index: + r.xf_index = -1 + elif data_len == 18: + # Seems the XF index in the cell_attr is dodgy + xfx = local_unpack(' -1: + fprintf(self.logfile, + "**ROW %d %d %d\n", + self.number, rowx, r.xf_index) + if verbose_rows: + print('ROW_B2', rowx, bits1, has_defaults, file=self.logfile) + r.dump(self.logfile, + header="--- sh #%d, rowx=%d ---" % (self.number, rowx)) + elif rc == XL_COLWIDTH: # BIFF2 only + if not fmt_info: continue + first_colx, last_colx, width\ + = local_unpack("= 2 + if self.biff_version == 21: + if self._xf_index_to_xl_type_map: + if true_xfx is not None: + xfx = true_xfx + else: + xfx = cell_attr[0] & 0x3F + if xfx == 0x3F: + if self._ixfe is None: + raise XLRDError("BIFF2 cell record has XF index 63 but no preceding IXFE record.") + xfx = self._ixfe + # OOo docs are capable of interpretation that each + # cell record is preceded immediately by its own IXFE record. + # Empirical evidence is that (sensibly) an IXFE record applies to all + # following cell records until another IXFE comes along. + return xfx + # Have either Excel 2.0, or broken 2.1 w/o XF records -- same effect. + self.biff_version = self.book.biff_version = 20 + #### check that XF slot in cell_attr is zero + xfx_slot = cell_attr[0] & 0x3F + assert xfx_slot == 0 + xfx = self._cell_attr_to_xfx.get(cell_attr) + if xfx is not None: + return xfx + if verbose: + fprintf(self.logfile, "New cell_attr %r at (%r, %r)\n", cell_attr, rowx, colx) + book = self.book + xf = self.fake_XF_from_BIFF20_cell_attr(cell_attr) + xfx = len(book.xf_list) + xf.xf_index = xfx + book.xf_list.append(xf) + if verbose: + xf.dump(self.logfile, header="=== Faked XF %d ===" % xfx, footer="======") + if xf.format_key not in book.format_map: + msg = "ERROR *** XF[%d] unknown format key (%d, 0x%04x)\n" + fprintf(self.logfile, msg, + xf.xf_index, xf.format_key, xf.format_key) + fmt = Format(xf.format_key, FUN, "General") + book.format_map[xf.format_key] = fmt + while len(book.format_list) <= xf.format_key: + book.format_list.append(fmt) + cellty_from_fmtty = { + FNU: XL_CELL_NUMBER, + FUN: XL_CELL_NUMBER, + FGE: XL_CELL_NUMBER, + FDT: XL_CELL_DATE, + FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text. + } + fmt = book.format_map[xf.format_key] + cellty = cellty_from_fmtty[fmt.type] + self._xf_index_to_xl_type_map[xf.xf_index] = cellty + self._cell_attr_to_xfx[cell_attr] = xfx + return xfx + + def fake_XF_from_BIFF20_cell_attr(self, cell_attr): + from .formatting import XF, XFAlignment, XFBorder, XFBackground, XFProtection + xf = XF() + xf.alignment = XFAlignment() + xf.alignment.indent_level = 0 + xf.alignment.shrink_to_fit = 0 + xf.alignment.text_direction = 0 + xf.border = XFBorder() + xf.border.diag_up = 0 + xf.border.diag_down = 0 + xf.border.diag_colour_index = 0 + xf.border.diag_line_style = 0 # no line + xf.background = XFBackground() + xf.protection = XFProtection() + (prot_bits, font_and_format, halign_etc) = unpack('> 6 + upkbits(xf.protection, prot_bits, ( + (6, 0x40, 'cell_locked'), + (7, 0x80, 'formula_hidden'), + )) + xf.alignment.hor_align = halign_etc & 0x07 + for mask, side in ((0x08, 'left'), (0x10, 'right'), (0x20, 'top'), (0x40, 'bottom')): + if halign_etc & mask: + colour_index, line_style = 8, 1 # black, thin + else: + colour_index, line_style = 0, 0 # none, none + setattr(xf.border, side + '_colour_index', colour_index) + setattr(xf.border, side + '_line_style', line_style) + bg = xf.background + if halign_etc & 0x80: + bg.fill_pattern = 17 + else: + bg.fill_pattern = 0 + bg.background_colour_index = 9 # white + bg.pattern_colour_index = 8 # black + xf.parent_style_index = 0 # ??????????? + xf.alignment.vert_align = 2 # bottom + xf.alignment.rotation = 0 + for attr_stem in ("format", "font", "alignment", "border", \ + "background", "protection"): + attr = "_%s_flag" % attr_stem + setattr(xf, attr, 1) + return xf + + def req_fmt_info(self): + if not self.formatting_info: + raise XLRDError("Feature requires open_workbook(..., formatting_info=True)") + + # Determine column display width. + # - New in version 0.6.1 + # + # @param colx Index of the queried column, range 0 to 255. + # Note that it is possible to find out the width that will be used to display + # columns with no cell information e.g. column IV (colx=255). + # @return The column width that will be used for displaying + # the given column by Excel, in units of 1/256th of the width of a + # standard character (the digit zero in the first font). + + def computed_column_width(self, colx): + self.req_fmt_info() + if self.biff_version >= 80: + colinfo = self.colinfo_map.get(colx, None) + if colinfo is not None: + return colinfo.width + if self.standardwidth is not None: + return self.standardwidth + elif self.biff_version >= 40: + if self.gcw[colx]: + if self.standardwidth is not None: + return self.standardwidth + else: + colinfo = self.colinfo_map.get(colx, None) + if colinfo is not None: + return colinfo.width + elif self.biff_version == 30: + colinfo = self.colinfo_map.get(colx, None) + if colinfo is not None: + return colinfo.width + # All roads lead to Rome and the DEFCOLWIDTH ... + if self.defcolwidth is not None: + return self.defcolwidth * 256 + return 8 * 256 # 8 is what Excel puts in a DEFCOLWIDTH record + + def handle_msodrawingetc(self, recid, data_len, data): + if not OBJ_MSO_DEBUG: + return + DEBUG = 1 + if self.biff_version < 80: + return + o = MSODrawing() + pos = 0 + while pos < data_len: + tmp, fbt, cb = unpack('> 4) & 0xFFF + if ver == 0xF: + ndb = 0 # container + else: + ndb = cb + if DEBUG: + hex_char_dump(data, pos, ndb + 8, base=0, fout=self.logfile) + fprintf(self.logfile, + "fbt:0x%04X inst:%d ver:0x%X cb:%d (0x%04X)\n", + fbt, inst, ver, cb, cb) + if fbt == 0xF010: # Client Anchor + assert ndb == 18 + (o.anchor_unk, + o.anchor_colx_lo, o.anchor_rowx_lo, + o.anchor_colx_hi, o.anchor_rowx_hi) = unpack('> 1) & 1 + # Docs say NULL [sic] bytes padding between string count and string data + # to ensure that string is word-aligned. Appears to be nonsense. + # There also seems to be a random(?) byte after the string (not counted in the + # string length. + o.original_author, endpos = unpack_unicode_update_pos(data, 8, lenlen=2) + assert endpos == data_len - 1 + o.last_byte = data[-1] + if DEBUG: + o.dump(self.logfile, header="=== MSNote ===", footer= " ") + + def handle_txo(self, data): + if not OBJ_MSO_DEBUG: + return + DEBUG = 1 + if self.biff_version < 80: + return + o = MSTxo() + data_len = len(data) + option_flags, o.rot, cchText, cbRuns = unpack('>= 2 # div by 4 to drop the 2 flag bits + if flags & 1: + return i / 100.0 + return float(i) + else: + # It's the most significant 30 bits of an IEEE 754 64-bit FP number + # (to_py3): replaced b'\0\0\0\0' + chr(flags & 252) + rk_str[1:4] + _bytes = array('B', b'\0\0\0\0') + _bytes.append(flags & 252) + _bytes.extend(rk_str[1:4]) + d, = unpack(' see XFCell() class in the xfcell module. +class Cell(XFCell): + def __repr__(self): + if not self.has_xf: + return "%s:%r" % (ctype_text[self.ctype], self.value) + else: + return "%s:%r (XF:%r)" % (ctype_text[self.ctype], self.value, self.xf_index) + +# There is one and only one instance of an empty cell -- it's a singleton. This is it. +# You may use a test like "acell is empty_cell". +empty_cell = Cell(XL_CELL_EMPTY, '') + +##### =============== Colinfo and Rowinfo ============================== ##### + + +# Width and default formatting information that applies to one or +# more columns in a sheet. Derived from COLINFO records. +# +# +# Here is the default hierarchy for width, according to the OOo docs: +# +# In BIFF3, if a COLINFO record is missing for a column, +# the width specified in the record DEFCOLWIDTH is used instead. +# +# In BIFF4-BIFF7, the width set in this [COLINFO] record is only used, +# if the corresponding bit for this column is cleared in the GCW +# record, otherwise the column width set in the DEFCOLWIDTH record +# is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]). +# +# In BIFF8, if a COLINFO record is missing for a column, +# the width specified in the record STANDARDWIDTH is used. +# If this [STANDARDWIDTH] record is also missing, +# the column width of the record DEFCOLWIDTH is used instead. +# +# Footnote: The docs on the GCW record say this: +# +# If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH +# record. If a bit is cleared, the corresponding column uses the width set in the +# COLINFO record for this column. +# +# If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if +# the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH +# record of the worksheet will be used instead. +# +# At the moment (2007-01-17) xlrd is going with the GCW version of the story. +# Reference to the source may be useful: see the computed_column_width(colx) method +# of the Sheet class. +# - New in version 0.6.1 + +class Colinfo(BaseObject): + # Width of the column in 1/256 of the width of the zero character, + # using default font (first FONT record in the file). + width = 0 + # XF index to be used for formatting empty cells. + xf_index = -1 + # 1 = column is hidden + hidden = 0 + # Value of a 1-bit flag whose purpose is unknown + # but is often seen set to 1 + bit1_flag = 0 + # Outline level of the column, in range(7). + # (0 = no outline) + outline_level = 0 + # 1 = column is collapsed + collapsed = 0 + +# Height and default formatting information that applies to a row in a sheet. +# Derived from ROW records. +# - New in version 0.6.1 + +class Rowinfo(BaseObject): + ## + # Height of the row, in twips. One twip == 1/20 of a point + height = 0 + ## + # 0 = Row has custom height; 1 = Row has default height + has_default_height = 0 + ## + # Outline level of the row + outline_level = 0 + ## + # 1 = Outline group starts or ends here (depending on where the + # outline buttons are located, see WSBOOL record [TODO ??]), + # and is collapsed + outline_group_starts_ends = 0 + ## + # 1 = Row is hidden (manually, or by a filter or outline group) + hidden = 0 + ## + # 1 = Row height and default font height do not match + height_mismatch = 0 + ## + # 1 = the xf_index attribute is usable; 0 = ignore it + has_default_xf_index = 0 + ## + # Index to default XF record for empty cells in this row. + # Don't use this if has_default_xf_index == 0. + xf_index = -9999 + ## + # This flag is set, if the upper border of at least one cell in this row + # or if the lower border of at least one cell in the row above is + # formatted with a thick line style. Thin and medium line styles are not + # taken into account. + additional_space_above = 0 + ## + # This flag is set, if the lower border of at least one cell in this row + # or if the upper border of at least one cell in the row below is + # formatted with a medium or thick line style. Thin line styles are not + # taken into account. + additional_space_below = 0 diff --git a/tablib/packages/xlrd3/xfcell.py b/tablib/packages/xlrd3/xfcell.py new file mode 100644 index 0000000..213b4bd --- /dev/null +++ b/tablib/packages/xlrd3/xfcell.py @@ -0,0 +1,276 @@ +# Author: mozman +# Purpose: xfcell -- cell with convenient xf function +# Created: 04.12.2010 +# Copyright (C) 2010, Manfred Moitzi +# License: BSD-style licence + +""" +The XFCell() object contains the data for one cell. + +WARNING: You don't call this class yourself. You access Cell objects +via methods of the Sheet object(s) that you found in the Book object that +was returned when you called xlrd.open_workbook("myfile.xls"). + +Cell objects have four attributes: `ctype` is an int, `value` (which depends +on `ctype`), `xf_index` and `sheet`, a reference to the containing sheet. If +**formatting_info** is not enabled when the workbook is opened, xf_index will +be **None**. + +The following table describes the types of cells and how their values +are represented in Python. + +=============== ===== ============ ========================================== +Type symbol Const Python value Note +=============== ===== ============ ========================================== +XL_CELL_EMPTY 0 "" +XL_CELL_TEXT 1 str +XL_CELL_NUMBER 2 float +XL_CELL_DATE 3 float +XL_CELL_BOOLEAN 4 int 1 means TRUE, 0 means FALSE +XL_CELL_ERROR 5 int representing internal Excel codes; for a + text representation, refer to the supplied + dictionary error_text_from_code +XL_CELL_BLANK 6 "" this type will appear only when + open_workbook(..., formatting_info=True) + is used. +=============== ===== ============ ========================================== +""" + +import datetime + +from .xldate import xldate_as_tuple +from .biffh import XL_CELL_DATE, BaseObject + +class XFCell(BaseObject): + """ Extended Cell() class with convenient methods for easy access of cell + properties. + """ + __slots__ = ['sheet', 'ctype', 'value', 'xf'] + + def __init__(self, ctype, value, xf_index=None, sheet=None): + self.sheet = sheet + self.ctype = ctype + self.value = value + + if xf_index is not None: + self.xf = self.book.xf_list[xf_index] + else: + self.xf = None + + @property + def book(self): + return self.sheet.book + + @property + def has_xf(self): + return (self.xf is not None) + + @property + def xf_index(self): + if self.has_xf: + return self.xf.xf_index + else: + return None + + @property + def parent_style(self): + return self.book.xf_list[self.xf.parent_style_index] + + @property + def is_datetime(self): + return self.ctype == XL_CELL_DATE + + @property + def has_date(self): + if self.is_datetime: + return self.value > 1. + return False + + def get_color(self, index): + return self.book.colour_map[index] + + def datetime(self): + """ Returns a datetime.datetime object if cell type is XL_CELL_DATE + else raises a TypeError, and raises ValueError if the the cell has + not date value (only time value is present). + """ + if self.is_datetime: + if self.has_date: + date = xldate_as_tuple(self.value, self.book.datemode) + return datetime.datetime(*date) + else: + raise ValueError("Cell has no date value.") + else: + raise TypeError("Cell is not a XL_CELL_DATE.") + + def date(self): + """ Returns a datetime.date object if cell type is XL_CELL_DATE + else raises a **TypeError**. Raises **ValueError** if the cell + doesn't have a date value (only time value is present). + """ + dt = self.datetime() + return dt.date() + + def time(self): + """ Returns a datetime.time object if cell type is XL_CELL_DATE else + raises a TypeError. + """ + if self.is_datetime: + date = xldate_as_tuple(self.value, self.book.datemode) + return datetime.time(date[3], date[4], date[5]) + else: + raise TypeError("Cell is not a XL_CELL_DATE.") + + # + # access the XFBackground() class + # + + @property + def background(self): + if self.xf.is_style and \ + self.xf._background_flag == 0: + return self.xf.background + elif self.xf._background_flag: + return self.xf.background + else: + return self.parent_style.background + + def background_color(self): + """ Get cell background-color as 3-tuple. """ + color_index = self.xf.background.background_colour_index + return self.get_color(color_index) + + def fill_pattern(self): + return self.xf.background.fill_pattern + + def pattern_color(self): + color_index = self.xf.background.pattern_colour_index + return self.get_color(color_index) + + # + # access the Font() class + # + + @property + def font_index(self): + if self.xf.is_style and \ + self.xf._font_flag == 0: + return self.xf.font_index + elif self.xf._font_flag: + return self.xf.font_index + else: + return self.parent_style.font_index + + @property + def font(self): + """ Get the Font() class. """ + return self.book.font_list[self.xf.font_index] + + def font_color(self): + """ Get cell foreground-color as 3-tuple. """ + return self.get_color(self.font.colour_index) + + # + # access the Format() class + # + + @property + def format_key(self): + if self.xf.is_style and \ + self.xf._format_flag == 0: + return self.xf.format_key + elif self.xf._format_flag: + return self.xf.format_key + else: + return self.parent_style.format_key + + @property + def format(self): + """ Get the Format() class. """ + return self.book.format_map[self.format_key] + + def format_str(self): + """ Get the associated 'format_str'. """ + return self.format.format_str + + # + # access the XFAligment() class + # + + @property + def alignment(self): + if self.xf.is_style and \ + self.xf._alignment_flag == 0: + return self.xf.alignment + elif self.xf._alignment_flag: + return self.xf.alignment + else: + return self.parent_style.alignment + + # + # access the XFBorder() class + # + + @property + def border(self): + if self.xf.is_style and \ + self.xf._border_flag == 0: + return self.xf.border + elif self.xf._border_flag: + return self.xf.border + else: + return self.parent_style.border + + def bordercolors(self): + """ Get border color as dict of rgb-color-tuples. """ + border = self.border + return { + 'top': self.get_color(border.top_colour_index), + 'bottom': self.get_color(border.bottom_colour_index), + 'left': self.get_color(border.left_colour_index), + 'right': self.get_color(border.right_colour_index), + 'diag': self.get_color(border.diag_colour_index), + } + + def borderstyles(self): + """ Get border styles as dict of ints. """ + border = self.border + return { + 'top': border.top_line_style, + 'bottom': border.bottom_line_style, + 'left': border.left_line_style, + 'right': border.right_line_style, + 'diag': border.diag_line_style, + } + + @property + def has_up_diag(self): + """ Draw a line across the cell from bottom left to top right. """ + return bool(self.border.diag_up) + + @property + def has_down_diag(self): + """ Draw a line across the cell from top left to bottom right. """ + return bool(self.border.diag_down) + + # + # access the XFProtection() class + # + + @property + def protection(self): + if self.xf.is_style and \ + self.xf._protection_flag == 0: + return self.xf.protection + elif self.xf._protection_flag: + return self.xf.protection + else: + return self.parent_style.protection + + @property + def is_cell_locked(self): + return bool(self.protection.cell_locked) + + @property + def is_formula_hidden(self): + return bool(self.protection.cell_locked) diff --git a/tablib/packages/xlrd3/xfconst.py b/tablib/packages/xlrd3/xfconst.py new file mode 100644 index 0000000..24f6533 --- /dev/null +++ b/tablib/packages/xlrd3/xfconst.py @@ -0,0 +1,84 @@ +# Author: mozman +# Purpose: xfconst -- xf constants +# Created: 05.12.2010 +# Copyright (C) 2010, Manfred Moitzi +# License: BSD-style licence + +# excelfileformat section 5.115.1 pg. 219 +HOR_ALIGN_GENERAL = 0 +HOR_ALIGN_LEFT = 1 +HOR_ALIGN_CENTRED = 2 +HOR_ALIGN_RIGHT = 3 +HOR_ALIGN_FILLED = 4 +HOR_ALIGN_JUSTIFIED = 5 +HOR_ALIGN_CENTRED_ACROSS_SELECTION = 6 +HOR_ALIGN_DISTRIBUTED = 7 + +# excelfileformat section 5.115.1 pg. 220 +VERT_ALIGN_TOP = 0 +VERT_ALIGN_CENTRED = 1 +VERT_ALIGN_BOTTOM = 2 +VERT_ALIGN_JUSTIFIED = 3 +VERT_ALIGN_DISTRIBUTED = 4 + +# excelfileformat section 5.115.1 pg. 220 +ORIENTATION_NONE = 0 +ORIENTATION_STACKED = 1 +ORIENTATION_90_COUNTERCLOCKWISE = 2 +ORIENTATION_90_CLOCKWISE = 3 + +# excelfileformat section 5.115.1 pg. 220 +ROTATION_NONE = 0 +ROTATION_STACKED = 255 +# other values: +# Value Description +# ======= ================================= +# 1-90 1 to 90 degrees counterclockwise +# 91-180 1 to 90 degrees clockwise + +# excelfileformat section 2.5.11 Line Styles for Cell Borders pg. 24 +LS_NOLINE = 0 + +# solid line +LS_THIN = 1 + +# solid line +LS_MEDIUM = 2 + +# - - - - - - - - - +LS_DASHED = 3 + +# ................. +LS_DOTTED = 4 + +# solid line +LS_THICK = 5 + +# ================= +LS_DOUBLE = 6 + +# very thin dotted +LS_HAIR = 7 + +# - - - - - - - - - +LS_MEDIUM_DASHED = 8 + +# - . - . - . - . - +LS_THIN_DASH_DOTTED = 9 + +# - . - . - . - . - +LS_MEDIUM_DASH_DOTTED = 10 + +# - .. - .. - .. - .. - +LS_THIN_DASH_DOT_DOTTED = 11 + +# - .. - .. - .. - .. - +LS_MEDIUM_DASH_DOT_DOTTED = 12 + +# \\\ . \\\ . \\\ . \\\ . \\\ +LS_SLANTED_MEDIUM_DASH_DOTTED = 13 + + + + + diff --git a/tablib/packages/xlrd3/xldate.py b/tablib/packages/xlrd3/xldate.py new file mode 100644 index 0000000..634709d --- /dev/null +++ b/tablib/packages/xlrd3/xldate.py @@ -0,0 +1,167 @@ +# No part of the content of this file was derived from the works of David Giffin. +# +# Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd +# This module is part of the xlrd3 package, which is released under a +# BSD-style licence. +# +# Provides function(s) for dealing with Microsoft Excel ™ dates. +# +# 2008-10-18 SJM Fix bug in xldate_from_date_tuple (affected some years after 2099) +# +# The conversion from days to (year, month, day) starts with +# an integral "julian day number" aka JDN. +# FWIW, JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713. +# More importantly: +# Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0 +# Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0 + +def ifd(x, y): + return divmod(x, y)[0] + +_JDN_delta = (2415080 - 61, 2416482 - 1) +assert _JDN_delta[1] - _JDN_delta[0] == 1462 + +class XLDateError(ValueError): pass + +class XLDateNegative(XLDateError): pass +class XLDateAmbiguous(XLDateError): pass +class XLDateTooLarge(XLDateError): pass +class XLDateBadDatemode(XLDateError): pass +class XLDateBadTuple(XLDateError): pass + +_XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462) # This is equivalent to 10000-01-01 + +# Convert an Excel number (presumed to represent a date, a datetime or a time) into +# a tuple suitable for feeding to datetime or mx.DateTime constructors. +# @param xldate The Excel number +# @param datemode 0: 1900-based, 1: 1904-based. +#
WARNING: when using this function to +# interpret the contents of a workbook, you should pass in the Book.datemode +# attribute of that workbook. Whether +# the workbook has ever been anywhere near a Macintosh is irrelevant. +# @return Gregorian (year, month, day, hour, minute, nearest_second). +#
Special case: if 0.0 <= xldate < 1.0, it is assumed to represent a time; +# (0, 0, 0, hour, minute, second) will be returned. +#
Note: 1904-01-01 is not regarded as a valid date in the datemode 1 system; its "serial number" +# is zero. +# @throws XLDateNegative xldate < 0.00 +# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0) +# @throws XLDateTooLarge Gregorian year 10000 or later +# @throws XLDateBadDatemode datemode arg is neither 0 nor 1 +# @throws XLDateError Covers the 4 specific errors + +def xldate_as_tuple(xldate, datemode): + if datemode not in (0, 1): + raise XLDateBadDatemode(datemode) + if xldate == 0.00: + return (0, 0, 0, 0, 0, 0) + if xldate < 0.00: + raise XLDateNegative(xldate) + xldays = int(xldate) + frac = xldate - xldays + seconds = int(round(frac * 86400.0)) + assert 0 <= seconds <= 86400 + if seconds == 86400: + hour = minute = second = 0 + xldays += 1 + else: + # second = seconds % 60; minutes = seconds // 60 + minutes, second = divmod(seconds, 60) + # minute = minutes % 60; hour = minutes // 60 + hour, minute = divmod(minutes, 60) + if xldays >= _XLDAYS_TOO_LARGE[datemode]: + raise XLDateTooLarge(xldate) + + if xldays == 0: + return (0, 0, 0, hour, minute, second) + + if xldays < 61 and datemode == 0: + raise XLDateAmbiguous(xldate) + + jdn = xldays + _JDN_delta[datemode] + yreg = (ifd(ifd(jdn * 4 + 274277, 146097) * 3, 4) + jdn + 1363) * 4 + 3 + mp = ifd(yreg % 1461, 4) * 535 + 333 + d = ifd(mp % 16384, 535) + 1 + # mp /= 16384 + mp >>= 14 + if mp >= 10: + return (ifd(yreg, 1461) - 4715, mp - 9, d, hour, minute, second) + else: + return (ifd(yreg, 1461) - 4716, mp + 3, d, hour, minute, second) + +# === conversions from date/time to xl numbers + +def _leap(y): + if y % 4: return 0 + if y % 100: return 1 + if y % 400: return 0 + return 1 + +_days_in_month = (None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + +# Convert a date tuple (year, month, day) to an Excel date. +# @param year Gregorian year. +# @param month 1 <= month <= 12 +# @param day 1 <= day <= last day of that (year, month) +# @param datemode 0: 1900-based, 1: 1904-based. +# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0) +# @throws XLDateBadDatemode datemode arg is neither 0 nor 1 +# @throws XLDateBadTuple (year, month, day) is too early/late or has invalid component(s) +# @throws XLDateError Covers the specific errors + +def xldate_from_date_tuple(datetuple, datemode): + + (year, month, day) = datetuple + if datemode not in (0, 1): + raise XLDateBadDatemode(datemode) + + if year == 0 and month == 0 and day == 0: + return 0.00 + + if not (1900 <= year <= 9999): + raise XLDateBadTuple("Invalid year: %r" % ((year, month, day),)) + if not (1 <= month <= 12): + raise XLDateBadTuple("Invalid month: %r" % ((year, month, day),)) + if day < 1 \ + or (day > _days_in_month[month] and not(day == 29 and month == 2 and _leap(year))): + raise XLDateBadTuple("Invalid day: %r" % ((year, month, day),)) + + Yp = year + 4716 + M = month + if M <= 2: + Yp = Yp - 1 + Mp = M + 9 + else: + Mp = M - 3 + jdn = ifd(1461 * Yp, 4) + ifd(979 * Mp + 16, 32) + \ + day - 1364 - ifd(ifd(Yp + 184, 100) * 3, 4) + xldays = jdn - _JDN_delta[datemode] + if xldays <= 0: + raise XLDateBadTuple("Invalid (year, month, day): %r" % ((year, month, day),)) + if xldays < 61 and datemode == 0: + raise XLDateAmbiguous("Before 1900-03-01: %r" % ((year, month, day),)) + return float(xldays) + +# Convert a time tuple (hour, minute, second) to an Excel "date" value (fraction of a day). +# @param hour 0 <= hour < 24 +# @param minute 0 <= minute < 60 +# @param second 0 <= second < 60 +# @throws XLDateBadTuple Out-of-range hour, minute, or second + +def xldate_from_time_tuple(timetuple): + (hour, minute, second) = timetuple + if 0 <= hour < 24 and 0 <= minute < 60 and 0 <= second < 60: + return ((second / 60.0 + minute) / 60.0 + hour) / 24.0 + raise XLDateBadTuple("Invalid (hour, minute, second): %r" % ((hour, minute, second),)) + +# Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value. +# For more details, refer to other xldate_from_*_tuple functions. +# @param datetime_tuple (year, month, day, hour, minute, second) +# @param datemode 0: 1900-based, 1: 1904-based. + +def xldate_from_datetime_tuple(datetime_tuple, datemode): + return ( + xldate_from_date_tuple(datetime_tuple[:3], datemode) + + + xldate_from_time_tuple(datetime_tuple[3:]) + ) diff --git a/test_tablib.py b/test_tablib.py index aab1b0f..d270085 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -657,7 +657,7 @@ class TablibTestCase(unittest.TestCase): self.founders.append(('Old', 'Man', 100500)) self.assertEquals( - u""" + """ first_name|last_name |gpa ----------|----------|------ John |Adams |90 From 15435047c668aa8e1fb5ffc1a8db30e5b944d5fd Mon Sep 17 00:00:00 2001 From: Marc Abramowitz Date: Tue, 15 May 2012 07:20:04 -0700 Subject: [PATCH 5/5] Add myself to AUTHORS --- AUTHORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 27fe615..8ad926c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -19,4 +19,5 @@ Patches and Suggestions - Mark Walling - Mike Waldner - Joel Friedly -- Jakub Janoszek \ No newline at end of file +- Jakub Janoszek +- Marc Abramowitz