add xml utility functions (#26333)
This commit is contained in:
parent
69c59729e1
commit
03f41cf340
|
@ -0,0 +1,83 @@
|
|||
# passerelle - uniform access to multiple data sources and services
|
||||
# Copyright (C) 2018 Entr'ouvert
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it
|
||||
# under the terms of the GNU Affero General Public License as published
|
||||
# by the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
def text_content(node):
|
||||
'''Extract text content from node and all its children. Equivalent to
|
||||
xmlNodeGetContent from libxml.'''
|
||||
|
||||
if node is None:
|
||||
return ''
|
||||
|
||||
def helper(node):
|
||||
s = []
|
||||
if node.text:
|
||||
s.append(node.text)
|
||||
for child in node:
|
||||
s.extend(helper(child))
|
||||
if child.tail:
|
||||
s.append(child.tail)
|
||||
return s
|
||||
return u''.join(helper(node))
|
||||
|
||||
|
||||
def to_json(root):
|
||||
'''Convert an XML document (a rooted tree) into dictionnary compatible with
|
||||
JSON serialization following those rules:
|
||||
- root is converted into a dictionnary, its children's node name are the
|
||||
keys,
|
||||
- all child nodes without child are considered to be only text and
|
||||
converted to a JSON string,
|
||||
- all child nodes with children are converted to an array with they
|
||||
children as root of a new conversion from XML to JSON.
|
||||
|
||||
Ex.:
|
||||
|
||||
<root>
|
||||
<child1>wtv</chidl1>
|
||||
<rows>
|
||||
<row>
|
||||
<child2>2</child2>
|
||||
</row>
|
||||
<row>
|
||||
<child3>3</child3>
|
||||
</row>
|
||||
</rows>
|
||||
</root>
|
||||
|
||||
is converted to:
|
||||
|
||||
{
|
||||
"child1": "wtv",
|
||||
"rows": [
|
||||
{"child2": "2"},
|
||||
{"child3": "3"}
|
||||
]
|
||||
}'''
|
||||
|
||||
d = {}
|
||||
for child in root:
|
||||
if not len(child): # text node
|
||||
value = text_content(child)
|
||||
if value:
|
||||
d[child.tag] = value
|
||||
else:
|
||||
d.setdefault(child.tag, [])
|
||||
for row in child:
|
||||
child_content = to_json(row)
|
||||
if child_content:
|
||||
d[child.tag].append(child_content)
|
||||
return d
|
|
@ -0,0 +1,33 @@
|
|||
import xml.etree.ElementTree as ET
|
||||
|
||||
from passerelle.utils.xml import to_json, text_content
|
||||
|
||||
|
||||
def test_text_content():
|
||||
root = ET.fromstring('<root>aa<b>bb</b>cc</root>')
|
||||
assert text_content(root) == 'aabbcc'
|
||||
|
||||
|
||||
def test_to_json():
|
||||
root = ET.fromstring('''<root>
|
||||
<text1>1</text1>
|
||||
<text2>2</text2>
|
||||
<enfants>
|
||||
<enfant>
|
||||
<text3>3</text3>
|
||||
</enfant>
|
||||
<enfant>
|
||||
<text3>4</text3>
|
||||
</enfant>
|
||||
<zob/>
|
||||
</enfants>
|
||||
<zob/>
|
||||
</root>''')
|
||||
assert to_json(root) == {
|
||||
'text1': '1',
|
||||
'text2': '2',
|
||||
'enfants': [
|
||||
{'text3': '3'},
|
||||
{'text3': '4'},
|
||||
]
|
||||
}
|
Loading…
Reference in New Issue