summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrédéric Péters <fpeters@entrouvert.com>2015-04-03 10:36:40 (GMT)
committerFrédéric Péters <fpeters@entrouvert.com>2015-04-03 10:36:40 (GMT)
commita3deae358084e5a4e1a2c0303d58aa47ae99574b (patch)
treee6dfb880eb44d88bb1965fc76a6edcfe44e6f61a
parenta7a432a6681122ae5cf09e8d1e2b0db50d0cb0fd (diff)
downloadtabellioOOo-a3deae358084e5a4e1a2c0303d58aa47ae99574b.zip
tabellioOOo-a3deae358084e5a4e1a2c0303d58aa47ae99574b.tar.gz
tabellioOOo-a3deae358084e5a4e1a2c0303d58aa47ae99574b.tar.bz2
odf2legi: parse static list styles (#6899)
The styles.xml file is supposed to be static but it can actually change in some situations; we cannot therefore rely on known style names for lists and have to parse the various levels to discover their formats.
-rw-r--r--odf2legi/odf2legi.py72
1 files changed, 54 insertions, 18 deletions
diff --git a/odf2legi/odf2legi.py b/odf2legi/odf2legi.py
index 371dfa4..68d417b 100644
--- a/odf2legi/odf2legi.py
+++ b/odf2legi/odf2legi.py
@@ -457,28 +457,32 @@ def handle_paragraph(parent, elem):
t_emph._children, para._children = para.getchildren(), [t_emph]
def handle_list(parent, elem):
-
style = elem.attrib.get('{%s}style-name' % TEXT_NS)
level = 1
+ style_props = None
+
+ ancestor = elem
+ ancestorstyle = None
+ while True:
+ try:
+ gdparent = ancestor.parent.parent
+ except AttributeError:
+ break
+ if gdparent.tag != '{%s}list' % TEXT_NS:
+ break
+ level += 1
+ ancestor = gdparent
+ if ancestorstyle is None:
+ ancestorstyle = ancestor.attrib.get('{%s}style-name' % TEXT_NS)
+
if style is None:
- # means our parent was also a list
- style_props = None
- ancestor = elem
- while True:
- try:
- gdparent = ancestor.parent.parent
- except AttributeError:
- break
- if gdparent.tag != '{%s}list' % TEXT_NS:
- break
- level += 1
- ancestor = gdparent
- ancestorstyle = ancestor.attrib.get('{%s}style-name' % TEXT_NS)
- style_props = STYLES.get('LIST:' + ancestorstyle)
+ if ancestorstyle:
+ style_props = STYLES.get('LIST:' + ancestorstyle)
else:
if 'parent' in STYLES.get(style, {}):
style = STYLES.get(style).get('parent')
style_props = STYLES.get('LIST:' + style)
+
num_format = None
if style_props:
num_format = style_props.get('levels', {}).get(level, {}).get('format')
@@ -788,11 +792,13 @@ def convert(input_filename, output_filename):
content = z.read(zfile)
elif zfile == 'meta.xml':
metadata = z.read(zfile)
+ elif zfile == 'styles.xml':
+ styles = z.read(zfile)
if content and metadata:
break
- legi = convert_to_legi_xml(content, metadata)
+ legi = convert_to_legi_xml(content, metadata, styles)
if debug:
print legi
@@ -814,6 +820,31 @@ def convert(input_filename, output_filename):
legiz.writestr(zi, z.read(zfile))
legiz.close()
+def parse_static_styles(content_tree):
+ '''
+ Parse styles from styles.xml
+ '''
+ global STYLES
+
+ for elem in content_tree.findall('{%s}styles/{%s}list-style' % (OFFICE_NS, TEXT_NS)):
+ style_name = 'LIST:%s' % elem.attrib.get('{%s}name' % STYLE_NS)
+ STYLES[style_name] = {'levels': {}}
+ for level in elem.findall('{%s}list-level-style-number' % TEXT_NS):
+ num_level = level.attrib.get('{%s}level' % TEXT_NS)
+ num_format = level.attrib.get('{%s}num-format' % STYLE_NS)
+ STYLES[style_name]['levels'][int(num_level)] = {
+ 'format': num_format,
+ }
+ for level in elem.findall('{%s}list-level-style-bullet' % TEXT_NS):
+ num_level = level.attrib.get('{%s}level' % TEXT_NS)
+ bullet_char = level.attrib.get('{%s}bullet-char' % TEXT_NS)
+ if not STYLES[style_name]['levels'].get(int(num_level)):
+ STYLES[style_name]['levels'][int(num_level)] = {}
+ STYLES[style_name]['levels'][int(num_level)].update({
+ 'bullet': bullet_char,
+ })
+
+
def parse_automatic_styles(content_tree):
'''
Parse styles created automatically and populate the global styles
@@ -870,7 +901,8 @@ def parse_automatic_styles(content_tree):
# parse automatic list styles
for elem in content_tree.findall('{%s}automatic-styles/{%s}list-style' % (OFFICE_NS, TEXT_NS)):
style_name = 'LIST:%s' % elem.attrib.get('{%s}name' % STYLE_NS)
- STYLES[style_name] = {'levels': {}}
+ if not style_name in STYLES:
+ STYLES[style_name] = {'levels': {}}
for level in elem.findall('{%s}list-level-style-number' % TEXT_NS):
num_level = level.attrib.get('{%s}level' % TEXT_NS)
num_format = level.attrib.get('{%s}num-format' % STYLE_NS)
@@ -887,7 +919,7 @@ def parse_automatic_styles(content_tree):
})
-def convert_to_legi_xml(content, metadata = None):
+def convert_to_legi_xml(content, metadata=None, styles=None):
'''
Convert a content.xml/metadata.xml pair from an odt file
to the legi XML format.
@@ -907,6 +939,10 @@ def convert_to_legi_xml(content, metadata = None):
metadata_element = ET.SubElement(legi, 'metadata')
create_metadata(metadata_element, metadata_tree, content_tree)
+ if styles:
+ styles_tree = ET.ElementTree(ET.fromstring(styles))
+ parse_static_styles(styles_tree)
+
current_top = [legi]
current_legi = []
speech = None