From 21c11b9911a0141602444265a34219c16c152067 Mon Sep 17 00:00:00 2001 From: Jim Dalton Date: Thu, 10 May 2012 11:14:17 -0700 Subject: [PATCH 1/2] Fix UnicodeError in HTML output * Alter `test_unicode_append` so that actual unicode characters outside the ASCII bytestring range are tested. * Make sure output of `render` in markup.py is unicode * Add wrapper around output of `export_set` in _html.py so that unicode characters are output. --- tablib/formats/_html.py | 5 ++++- tablib/packages/markup.py | 12 ++++++------ test_tablib.py | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py index ffde95f..222c589 100644 --- a/tablib/formats/_html.py +++ b/tablib/formats/_html.py @@ -11,6 +11,7 @@ if sys.version_info[0] > 2: from tablib.packages import markup3 as markup else: from cStringIO import StringIO + import codecs from tablib.packages import markup import tablib @@ -45,7 +46,9 @@ def export_set(dataset): page.table.close() - stream.writelines(str(page)) + # Allow unicode characters in output + wrapper = codecs.getwriter("utf8")(stream) + wrapper.writelines(unicode(page)) return stream.getvalue() diff --git a/tablib/packages/markup.py b/tablib/packages/markup.py index 234f116..0198899 100644 --- a/tablib/packages/markup.py +++ b/tablib/packages/markup.py @@ -67,7 +67,7 @@ class element: def render( self, tag, single, between, kwargs ): """Append the actual tags to content.""" - out = "<%s" % tag + out = u"<%s" % tag for key, value in kwargs.iteritems( ): if value is not None: # when value is None that means stuff like <... checked> key = key.strip('_') # strip this so class_ will mean class, etc. @@ -75,16 +75,16 @@ class element: key = 'http-equiv' elif key == 'accept_charset': key = 'accept-charset' - out = "%s %s=\"%s\"" % ( out, key, escape( value ) ) + out = u"%s %s=\"%s\"" % ( out, key, escape( value ) ) else: - out = "%s %s" % ( out, key ) + out = u"%s %s" % ( out, key ) if between is not None: - out = "%s>%s" % ( out, between, tag ) + out = u"%s>%s" % ( out, between, tag ) else: if single: - out = "%s />" % out + out = u"%s />" % out else: - out = "%s>" % out + out = u"%s>" % out if self.parent is not None: self.parent.content.append( out ) else: diff --git a/test_tablib.py b/test_tablib.py index aab1b0f..15a6bec 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -308,7 +308,7 @@ class TablibTestCase(unittest.TestCase): def test_unicode_append(self): """Passes in a single unicode charecter and exports.""" - new_row = ('å', 'é') + new_row = (u'å', u'é') data.append(new_row) data.json From 71603662b1d81a0fa9566cf68cb3fc5a584bdffd Mon Sep 17 00:00:00 2001 From: Jim Dalton Date: Thu, 10 May 2012 11:29:41 -0700 Subject: [PATCH 2/2] Make sure codecs module loaded for all versions of Python --- tablib/formats/_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py index 222c589..2a76638 100644 --- a/tablib/formats/_html.py +++ b/tablib/formats/_html.py @@ -11,10 +11,10 @@ if sys.version_info[0] > 2: from tablib.packages import markup3 as markup else: from cStringIO import StringIO - import codecs from tablib.packages import markup import tablib +import codecs BOOK_ENDINGS = 'h3'