From a3deae358084e5a4e1a2c0303d58aa47ae99574b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9ters?= Date: Fri, 3 Apr 2015 12:36:40 +0200 Subject: [PATCH] odf2legi: parse static list styles (#6899) The styles.xml file is supposed to be static but it can actually change in some situations; we cannot therefore rely on known style names for lists and have to parse the various levels to discover their formats. --- odf2legi/odf2legi.py | 72 +++++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/odf2legi/odf2legi.py b/odf2legi/odf2legi.py index 371dfa4..68d417b 100644 --- a/odf2legi/odf2legi.py +++ b/odf2legi/odf2legi.py @@ -457,28 +457,32 @@ def handle_paragraph(parent, elem): t_emph._children, para._children = para.getchildren(), [t_emph] def handle_list(parent, elem): - style = elem.attrib.get('{%s}style-name' % TEXT_NS) level = 1 + style_props = None + + ancestor = elem + ancestorstyle = None + while True: + try: + gdparent = ancestor.parent.parent + except AttributeError: + break + if gdparent.tag != '{%s}list' % TEXT_NS: + break + level += 1 + ancestor = gdparent + if ancestorstyle is None: + ancestorstyle = ancestor.attrib.get('{%s}style-name' % TEXT_NS) + if style is None: - # means our parent was also a list - style_props = None - ancestor = elem - while True: - try: - gdparent = ancestor.parent.parent - except AttributeError: - break - if gdparent.tag != '{%s}list' % TEXT_NS: - break - level += 1 - ancestor = gdparent - ancestorstyle = ancestor.attrib.get('{%s}style-name' % TEXT_NS) - style_props = STYLES.get('LIST:' + ancestorstyle) + if ancestorstyle: + style_props = STYLES.get('LIST:' + ancestorstyle) else: if 'parent' in STYLES.get(style, {}): style = STYLES.get(style).get('parent') style_props = STYLES.get('LIST:' + style) + num_format = None if style_props: num_format = style_props.get('levels', {}).get(level, {}).get('format') @@ -788,11 +792,13 @@ def convert(input_filename, output_filename): content = z.read(zfile) elif zfile == 'meta.xml': metadata = z.read(zfile) + elif zfile == 'styles.xml': + styles = z.read(zfile) if content and metadata: break - legi = convert_to_legi_xml(content, metadata) + legi = convert_to_legi_xml(content, metadata, styles) if debug: print legi @@ -814,6 +820,31 @@ def convert(input_filename, output_filename): legiz.writestr(zi, z.read(zfile)) legiz.close() +def parse_static_styles(content_tree): + ''' + Parse styles from styles.xml + ''' + global STYLES + + for elem in content_tree.findall('{%s}styles/{%s}list-style' % (OFFICE_NS, TEXT_NS)): + style_name = 'LIST:%s' % elem.attrib.get('{%s}name' % STYLE_NS) + STYLES[style_name] = {'levels': {}} + for level in elem.findall('{%s}list-level-style-number' % TEXT_NS): + num_level = level.attrib.get('{%s}level' % TEXT_NS) + num_format = level.attrib.get('{%s}num-format' % STYLE_NS) + STYLES[style_name]['levels'][int(num_level)] = { + 'format': num_format, + } + for level in elem.findall('{%s}list-level-style-bullet' % TEXT_NS): + num_level = level.attrib.get('{%s}level' % TEXT_NS) + bullet_char = level.attrib.get('{%s}bullet-char' % TEXT_NS) + if not STYLES[style_name]['levels'].get(int(num_level)): + STYLES[style_name]['levels'][int(num_level)] = {} + STYLES[style_name]['levels'][int(num_level)].update({ + 'bullet': bullet_char, + }) + + def parse_automatic_styles(content_tree): ''' Parse styles created automatically and populate the global styles @@ -870,7 +901,8 @@ def parse_automatic_styles(content_tree): # parse automatic list styles for elem in content_tree.findall('{%s}automatic-styles/{%s}list-style' % (OFFICE_NS, TEXT_NS)): style_name = 'LIST:%s' % elem.attrib.get('{%s}name' % STYLE_NS) - STYLES[style_name] = {'levels': {}} + if not style_name in STYLES: + STYLES[style_name] = {'levels': {}} for level in elem.findall('{%s}list-level-style-number' % TEXT_NS): num_level = level.attrib.get('{%s}level' % TEXT_NS) num_format = level.attrib.get('{%s}num-format' % STYLE_NS) @@ -887,7 +919,7 @@ def parse_automatic_styles(content_tree): }) -def convert_to_legi_xml(content, metadata = None): +def convert_to_legi_xml(content, metadata=None, styles=None): ''' Convert a content.xml/metadata.xml pair from an odt file to the legi XML format. @@ -907,6 +939,10 @@ def convert_to_legi_xml(content, metadata = None): metadata_element = ET.SubElement(legi, 'metadata') create_metadata(metadata_element, metadata_tree, content_tree) + if styles: + styles_tree = ET.ElementTree(ET.fromstring(styles)) + parse_static_styles(styles_tree) + current_top = [legi] current_legi = [] speech = None