add xml utility functions (#26333)

This commit is contained in:
Benjamin Dauvergne 2018-09-18 15:15:23 +02:00
parent 69c59729e1
commit 03f41cf340
2 changed files with 116 additions and 0 deletions

83
passerelle/utils/xml.py Normal file
View File

@ -0,0 +1,83 @@
# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2018 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
def text_content(node):
'''Extract text content from node and all its children. Equivalent to
xmlNodeGetContent from libxml.'''
if node is None:
return ''
def helper(node):
s = []
if node.text:
s.append(node.text)
for child in node:
s.extend(helper(child))
if child.tail:
s.append(child.tail)
return s
return u''.join(helper(node))
def to_json(root):
'''Convert an XML document (a rooted tree) into dictionnary compatible with
JSON serialization following those rules:
- root is converted into a dictionnary, its children's node name are the
keys,
- all child nodes without child are considered to be only text and
converted to a JSON string,
- all child nodes with children are converted to an array with they
children as root of a new conversion from XML to JSON.
Ex.:
<root>
<child1>wtv</chidl1>
<rows>
<row>
<child2>2</child2>
</row>
<row>
<child3>3</child3>
</row>
</rows>
</root>
is converted to:
{
"child1": "wtv",
"rows": [
{"child2": "2"},
{"child3": "3"}
]
}'''
d = {}
for child in root:
if not len(child): # text node
value = text_content(child)
if value:
d[child.tag] = value
else:
d.setdefault(child.tag, [])
for row in child:
child_content = to_json(row)
if child_content:
d[child.tag].append(child_content)
return d

33
tests/test_utils_xml.py Normal file
View File

@ -0,0 +1,33 @@
import xml.etree.ElementTree as ET
from passerelle.utils.xml import to_json, text_content
def test_text_content():
root = ET.fromstring('<root>aa<b>bb</b>cc</root>')
assert text_content(root) == 'aabbcc'
def test_to_json():
root = ET.fromstring('''<root>
<text1>1</text1>
<text2>2</text2>
<enfants>
<enfant>
<text3>3</text3>
</enfant>
<enfant>
<text3>4</text3>
</enfant>
<zob/>
</enfants>
<zob/>
</root>''')
assert to_json(root) == {
'text1': '1',
'text2': '2',
'enfants': [
{'text3': '3'},
{'text3': '4'},
]
}