Add LRU cached resolvers to context class

- Add get_parent() and get_path() to XPathContext
This commit is contained in:
Davide Brunato 2019-10-04 20:44:39 +02:00
parent de6955b36c
commit 04f63f6b4d
4 changed files with 98 additions and 50 deletions

View File

@ -20,6 +20,7 @@ if PY3:
unicode_type = str
unicode_chr = chr
from collections.abc import MutableSequence
from functools import lru_cache
else:
# noinspection PyCompatibility
from urllib2 import URLError, quote as urllib_quote
@ -28,6 +29,18 @@ else:
unicode_type = unicode
unicode_chr = unichr
from collections import MutableSequence
from functools import wraps
def lru_cache(maxsize=128, typed=False):
"""
A fake lru_cache decorator function for Python 2.7 compatibility until support ends.
"""
def lru_cache_decorator(f):
@wraps(f)
def wrapper(*args, **kwargs):
return f(*args, **kwargs)
return wrapper
return lru_cache_decorator
def add_metaclass(metaclass):

View File

@ -481,14 +481,10 @@ def select(self, context=None):
if context is None:
self.missing_context()
else:
try:
parent = context.parent_map[context.item]
except KeyError:
pass
else:
if is_element_node(parent):
context.item = parent
yield parent
parent = context.get_parent(context.item)
if is_element_node(parent):
context.item = parent
yield parent
###

View File

@ -10,6 +10,7 @@
#
import datetime
from .compat import lru_cache
from .exceptions import ElementPathTypeError
from .xpath_nodes import AttributeNode, TypedAttribute, TypedElement, is_etree_element, \
is_element_node, is_document_node, is_attribute_node
@ -41,13 +42,12 @@ class XPathContext(object):
raise ElementPathTypeError(
"invalid argument root={!r}, an Element is required.".format(root)
)
self._root = root
self.root = root
if item is not None:
self.item = item
elif is_element_node(root):
self.item = root
else:
self.item = None
self.item = root if hasattr(root, 'tag') else None
self.position = position
self.size = size
@ -55,16 +55,17 @@ class XPathContext(object):
self.variables = {} if variables is None else dict(variables)
self.current_dt = current_dt or datetime.datetime.now()
self.timezone = timezone
self._elem = item if is_element_node(item) else root
self._parent_map = None
def __repr__(self):
return '%s(root=%r, item=%r, position=%r, size=%r, axis=%r)' % (
self.__class__.__name__, self._root, self.item, self.position, self.size, self.axis
self.__class__.__name__, self.root, self.item, self.position, self.size, self.axis
)
def copy(self, clear_axis=True):
obj = type(self)(
root=self._root,
root=self.root,
item=self.item,
position=self.position,
size=self.size,
@ -73,20 +74,53 @@ class XPathContext(object):
current_dt=self.current_dt,
timezone=self.timezone,
)
obj._elem = self._elem
obj._parent_map = self._parent_map
return obj
@property
def root(self):
return self._root
@property
def parent_map(self):
# TODO: try to implement a dynamic parent map to save memory ...
if self._parent_map is None:
self._parent_map = {child: elem for elem in self._root.iter() for child in elem}
self._parent_map = {child: elem for elem in self.root.iter() for child in elem}
return self._parent_map
@lru_cache(maxsize=1024)
def get_parent(self, elem):
"""
Returns the parent element or `None` for root element and for elements
that are not included in the tree. Uses a LRU cache to minimize parent
map rebuilding for trees processed with an incremental parser.
"""
if elem is self.root:
return
try:
return self._parent_map[elem]
except (KeyError, TypeError):
self._parent_map = {child: elem for elem in self.root.iter() for child in elem}
try:
return self._parent_map[elem]
except KeyError:
return
@lru_cache(maxsize=1024)
def get_path(self, item):
"""Cached path resolver for elements and attributes."""
path = []
if isinstance(item, (AttributeNode, TypedAttribute)):
path.append('@%s' % item[0])
item = self._elem
elif isinstance(item, TypedElement):
item = item[0]
while True:
parent = self.get_parent(item)
if parent is None:
return '/'.join(reversed(path))
path.append(item.tag)
item = parent
def is_principal_node_kind(self):
if self.axis == 'attribute':
return is_attribute_node(self.item)
@ -103,6 +137,7 @@ class XPathContext(object):
def iter_attributes(self):
if not is_element_node(self.item):
return
self._elem = self.item
status = self.item, self.size, self.position, self.axis
self.axis = 'attribute'
@ -110,20 +145,19 @@ class XPathContext(object):
if isinstance(self.item, TypedElement):
self.item = self.item.elem
for item in self.item.attrib.items():
self.item = AttributeNode(*item)
for self.item in map(lambda x: AttributeNode(*x), self.item.attrib.items()):
yield self.item
self.item, self.size, self.position, self.axis = status
def iter_children_or_self(self, item=None, child_axis=False):
status = self.item, self.size, self.position, self.axis
if not child_axis and self.axis is not None:
yield self.item
self.item, self.size, self.position, self.axis = status
return
status = self.item, self.size, self.position, self.axis
self.axis = 'child'
if item is not None:
self.item = item[0] if isinstance(item, TypedElement) else item
elif isinstance(self.item, TypedElement):
@ -131,7 +165,7 @@ class XPathContext(object):
if self.item is None:
self.size, self.position = 1, 0
self.item = self._root.getroot() if is_document_node(self._root) else self._root
self.item = self.root.getroot() if is_document_node(self.root) else self.root
yield self.item
elif is_etree_element(self.item):
elem = self.item
@ -145,18 +179,17 @@ class XPathContext(object):
self.item, self.size, self.position, self.axis = status
def iter_preceding(self):
status = self.item, self.size, self.position, self.axis
item = e = self.item[0] if isinstance(self.item, TypedElement) else self.item
if not is_etree_element(item):
return
status = self.item, self.size, self.position, self.axis
self.axis = 'preceding'
ancestors = []
while True:
try:
parent = self.parent_map[e]
except KeyError:
parent = self.get_parent(e)
if parent is None:
break
else:
ancestors.append(parent)
@ -172,20 +205,19 @@ class XPathContext(object):
self.item, self.size, self.position, self.axis = status
def iter_parent(self, axis=None):
status = self.item, self.size, self.position, self.axis
self.axis = axis
try:
if isinstance(self.item, TypedElement):
self.item = self.parent_map[self.item[0]]
else:
self.item = self.parent_map[self.item]
except KeyError:
pass
if isinstance(self.item, TypedElement):
parent = self.get_parent(self.item[0])
else:
parent = self.get_parent(self.item)
if parent is not None:
status = self.item, self.size, self.position, self.axis
self.axis = axis
self.item = parent
yield self.item
self.item, self.size, self.position, self.axis = status
self.item, self.size, self.position, self.axis = status
def iter_descendants(self, item=None, axis=None):
status = self.item, self.size, self.position, self.axis
@ -198,8 +230,8 @@ class XPathContext(object):
if self.item is None:
self.size, self.position = 1, 0
yield self._root
self.item = self._root.getroot() if is_document_node(self._root) else self._root
yield self.root
self.item = self.root.getroot() if is_document_node(self.root) else self.root
elif not is_element_node(self.item):
return
@ -209,7 +241,7 @@ class XPathContext(object):
self.item, self.size, self.position, self.axis = status
def _iter_descendants(self):
elem = self.item
elem = self._elem = self.item
yield elem
if elem.text is not None:
self.item = elem.text
@ -230,9 +262,8 @@ class XPathContext(object):
self.item = self.item[0]
while True:
try:
parent = self.parent_map[self.item]
except KeyError:
parent = self.get_parent(self.item)
if parent is None:
break
else:
self.item = parent
@ -246,8 +277,8 @@ class XPathContext(object):
if self.item is None:
self.size, self.position = 1, 0
yield self._root
self.item = self._root.getroot() if is_document_node(self._root) else self._root
yield self.root
self.item = self.root.getroot() if is_document_node(self.root) else self.root
elif isinstance(self.item, TypedElement):
self.item = self.item[0]
elif not is_etree_element(self.item):
@ -280,7 +311,7 @@ class XPathContext(object):
self.item, self.size, self.position = status
def _iter_context(self):
elem = self.item
elem = self._elem = self.item
yield elem
if elem.text is not None:
self.item = elem.text

View File

@ -48,6 +48,14 @@ class XPathContextTest(unittest.TestCase):
root[2]: root, root[2][0]: root[2], root[2][1]: root[2]
})
def test_path(self):
root = ElementTree.XML('<A><B1><C1/></B1><B2/><B3><C1/><C2 max="10"/></B3></A>')
context = XPathContext(root)
self.assertEqual(context.get_path(root), '')
self.assertEqual(context.get_path(root[0]), 'B1')
def test_iter_attributes(self):
root = ElementTree.XML('<A a1="10" a2="20"/>')
context = XPathContext(root)