From 526e9baa9ffb9d70f8eadbf166127fcbd5966d85 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 5 Oct 2019 20:47:32 +0200 Subject: [PATCH] Add schema matching by path --- elementpath/__init__.py | 2 +- elementpath/schema_proxy.py | 90 ++++-------------------------------- elementpath/xpath1_parser.py | 74 +++++++++++++++++++++++------ elementpath/xpath_context.py | 6 +-- elementpath/xpath_token.py | 5 +- tests/test_schema_proxy.py | 11 +++++ tests/test_xpath2_parser.py | 3 +- tests/test_xpath_context.py | 11 ++++- 8 files changed, 96 insertions(+), 106 deletions(-) diff --git a/elementpath/__init__.py b/elementpath/__init__.py index b690657..e217285 100644 --- a/elementpath/__init__.py +++ b/elementpath/__init__.py @@ -28,4 +28,4 @@ from .xpath_token import XPathToken from .xpath1_parser import XPath1Parser from .xpath2_constructors import XPath2Parser from .xpath_selectors import select, iter_select, Selector -from .schema_proxy import AbstractSchemaProxy, XMLSchemaProxy +from .schema_proxy import AbstractSchemaProxy diff --git a/elementpath/schema_proxy.py b/elementpath/schema_proxy.py index 96ea4a9..6be48ab 100644 --- a/elementpath/schema_proxy.py +++ b/elementpath/schema_proxy.py @@ -10,8 +10,7 @@ # from abc import ABCMeta, abstractmethod from .compat import add_metaclass -from .exceptions import ElementPathTypeError, ElementPathValueError -from .namespaces import XSD_NAMESPACE +from .exceptions import ElementPathTypeError from .xpath_nodes import is_etree_element from .xpath_context import XPathSchemaContext @@ -186,6 +185,12 @@ class AbstractSchemaProxy(object): :returns: an object that represents an XSD element or `None`. """ + # TODO: can make this as @abstractmethod from release v1.3.1 + def find(self, path, namespaces=None): + """ + Find the schema component using an XPath expression. + """ + @abstractmethod def get_substitution_group(self, qname): """ @@ -234,82 +239,5 @@ class AbstractSchemaProxy(object): """ -class XMLSchemaProxy(AbstractSchemaProxy): - """ - Schema proxy for the *xmlschema* library. It will be removed soon because - xmlschema v1.0.14 will includes an its own version of schema proxy that - uses a custom context implementation that recognizes circular references. - """ - def __init__(self, schema=None, base_element=None): - if schema is None: - from xmlschema import XMLSchema - schema = XMLSchema.meta_schema - super(XMLSchemaProxy, self).__init__(schema, base_element) - - if base_element is not None: - try: - if base_element.schema is not schema: - raise ElementPathValueError("%r is not an element of %r" % (base_element, schema)) - except AttributeError: - raise ElementPathTypeError("%r is not an XsdElement" % base_element) - - def get_type(self, qname): - try: - return self._schema.maps.types[qname] - except KeyError: - return None - - def get_attribute(self, qname): - try: - return self._schema.maps.attributes[qname] - except KeyError: - return None - - def get_element(self, qname): - try: - return self._schema.maps.elements[qname] - except KeyError: - return None - - def get_substitution_group(self, qname): - try: - return self._schema.maps.substitution_groups[qname] - except KeyError: - return None - - def is_instance(self, obj, type_qname): - xsd_type = self._schema.maps.types[type_qname] - try: - xsd_type.encode(obj) - except ValueError: - return False - else: - return True - - def cast_as(self, obj, type_qname): - xsd_type = self._schema.maps.types[type_qname] - return xsd_type.decode(obj) - - def iter_atomic_types(self): - for xsd_type in self._schema.maps.types.values(): - if xsd_type.target_namespace != XSD_NAMESPACE and hasattr(xsd_type, 'primitive_type'): - yield xsd_type - - def get_primitive_type(self, xsd_type): - if not xsd_type.is_simple(): - if not xsd_type.has_simple_content(): - return self._schema.maps.types['{%s}anyType' % XSD_NAMESPACE] - xsd_type = xsd_type.content_type - - if not hasattr(xsd_type, 'primitive_type'): - if xsd_type.base_type is None: - return xsd_type - return self.get_primitive_type(xsd_type.base_type) - elif xsd_type.primitive_type is not xsd_type: - return self.get_primitive_type(xsd_type.primitive_type) - else: - return xsd_type - - -__all__ = ['AbstractXsdComponent', 'AbstractEtreeElement', 'AbstractXsdType', 'AbstractXsdAttribute', - 'AbstractXsdElement', 'AbstractSchemaProxy', 'XMLSchemaProxy'] +__all__ = ['AbstractXsdComponent', 'AbstractEtreeElement', 'AbstractXsdType', + 'AbstractXsdAttribute', 'AbstractXsdElement', 'AbstractSchemaProxy'] diff --git a/elementpath/xpath1_parser.py b/elementpath/xpath1_parser.py index ed55172..6c58333 100644 --- a/elementpath/xpath1_parser.py +++ b/elementpath/xpath1_parser.py @@ -20,6 +20,7 @@ from .xpath_context import XPathSchemaContext from .tdop_parser import Parser, MultiLabel from .namespaces import XML_ID, XML_LANG, XPATH_1_DEFAULT_NAMESPACES, \ XPATH_FUNCTIONS_NAMESPACE, XSD_NAMESPACE, qname_to_prefixed +from .schema_proxy import AbstractSchemaProxy from .xpath_token import XPathToken from .xpath_nodes import AttributeNode, NamespaceNode, TypedAttribute, TypedElement,\ is_etree_element, is_xpath_node, is_element_node, is_document_node, is_attribute_node, \ @@ -246,13 +247,11 @@ def select(self, context=None): return name = self.value - if name[0] != '{' and self.parser.default_namespace: - tag = u'{%s}%s' % (self.parser.default_namespace, name) - else: - tag = name - if isinstance(context, XPathSchemaContext): - # Bind with the XSD type + # Bind with the XSD type from a schema + if name[0] != '{' and self.parser.default_namespace: + name = '{%s}%s' % (self.parser.default_namespace, name) + for item in context.iter_children_or_self(): xsd_type = self.match_xsd_type(item, name) if xsd_type is not None: @@ -266,14 +265,60 @@ def select(self, context=None): yield TypedAttribute(item, value) else: yield TypedElement(item, value) + return + + if name[0] != '{' and self.parser.default_namespace: + tag = '{%s}%s' % (self.parser.default_namespace, name) + else: + tag = name + + # Checks if the token is bound to an XSD type. If not try a match using + # the element path. If this match fails the xsd_type attribute is set + # with the schema object to prevent other checks until the schema change. + if self.xsd_type is self.parser.schema: - elif self.xsd_type is None: # Untyped selection for item in context.iter_children_or_self(): if is_attribute_node(item, name): yield item elif is_element_node(item, tag): yield item + + elif self.xsd_type is None or isinstance(self.xsd_type, AbstractSchemaProxy): + + # Try to match the type using the path + for item in context.iter_children_or_self(): + try: + if is_attribute_node(item, name): + path = context.get_path(item) + xsd_attribute = self.parser.schema.find(path, self.parser.namespaces) + + if xsd_attribute is not None: + self.xsd_type = xsd_attribute.type + yield TypedAttribute(item, self.xsd_type.decode(item[1])) + else: + self.xsd_type = self.parser.schema + yield item + elif is_element_node(item, tag): + path = context.get_path(item) + xsd_element = self.parser.schema.find(path, self.parser.namespaces) + + if xsd_element is not None: + self.xsd_type = xsd_element.type + if isinstance(item, TypedElement): + yield item + elif self.xsd_type.is_simple() or self.xsd_type.has_simple_content(): + yield TypedElement(item, self.xsd_type.decode(item.text)) + else: + yield item + else: + self.xsd_type = self.parser.schema + yield item + + except (TypeError, ValueError): + msg = "Type {!r} does not match sequence type of {!r}" + self.wrong_sequence_type(msg.format(self.xsd_type, item)) + else: # XSD typed selection for item in context.iter_children_or_self(): @@ -369,7 +414,7 @@ def select(self, context=None): if context is not None: for item in context.iter_children_or_self(): if is_attribute_node(item, value): - yield item[1] + yield item elif is_element_node(item, value): yield item @@ -491,12 +536,16 @@ def select(self, context=None): # Logical Operators @method(infix('or', bp=20)) def evaluate(self, context=None): - return bool(self[0].evaluate(context) or self[1].evaluate(context)) + if context is None: + return bool(self[0].evaluate() or self[1].evaluate()) + return bool(self[0].evaluate(context.copy()) or self[1].evaluate(context.copy())) @method(infix('and', bp=25)) def evaluate(self, context=None): - return bool(self[0].evaluate(context) and self[1].evaluate(context)) + if context is None: + return bool(self[0].evaluate() and self[1].evaluate()) + return bool(self[0].evaluate(context.copy()) and self[1].evaluate(context.copy())) @method(infix('=', bp=30)) @@ -748,10 +797,7 @@ def led(self, left): @method('[') def select(self, context=None): - if isinstance(context, XPathSchemaContext): - for item in self[0].select(context): - yield item - elif context is not None: + if context is not None: for position, item in enumerate(self[0].select(context), start=1): predicate = list(self[1].select(context.copy())) if len(predicate) == 1 and isinstance(predicate[0], NumericTypeProxy): diff --git a/elementpath/xpath_context.py b/elementpath/xpath_context.py index 6f451fe..60bb649 100644 --- a/elementpath/xpath_context.py +++ b/elementpath/xpath_context.py @@ -105,7 +105,7 @@ class XPathContext(object): @lru_cache(maxsize=1024) def get_path(self, item): - """Cached path resolver for elements and attributes.""" + """Cached path resolver for elements and attributes. Returns absolute paths.""" path = [] if isinstance(item, (AttributeNode, TypedAttribute)): @@ -116,9 +116,9 @@ class XPathContext(object): while True: parent = self.get_parent(item) - if parent is None: - return '/'.join(reversed(path)) path.append(item.tag) + if parent is None: + return '/{}'.format('/'.join(reversed(path))) item = parent def is_principal_node_kind(self): diff --git a/elementpath/xpath_token.py b/elementpath/xpath_token.py index 284f2cb..a819491 100644 --- a/elementpath/xpath_token.py +++ b/elementpath/xpath_token.py @@ -381,12 +381,9 @@ class XPathToken(Token): schema type an exception is raised. :param schema_item: an XPath item related with a schema instance. - :param name: a not empty string. + :param name: a QName in extended format for matching the item. :returns: the matched XSD type or `None` if there isn't a match. """ - if name[0] != '{' and self.parser.default_namespace: - name = '{%s}%s' % (self.parser.default_namespace, name) - if isinstance(schema_item, AttributeNode): if not schema_item[1].is_matching(name): return diff --git a/tests/test_schema_proxy.py b/tests/test_schema_proxy.py index bcd1d07..300f984 100644 --- a/tests/test_schema_proxy.py +++ b/tests/test_schema_proxy.py @@ -121,6 +121,17 @@ class XPath2ParserXMLSchemaTest(test_xpath2_parser.XPath2ParserTest): any_simple_type = schema_proxy.get_type('{%s}anySimpleType' % XSD_NAMESPACE) self.assertEqual(schema_proxy.get_primitive_type(any_simple_type), any_simple_type) + def test_find_api(self): + schema_src = """ + + """ + schema = xmlschema.XMLSchema(schema_src) + schema_proxy = XMLSchemaProxy(schema=schema) + if xmlschema.__version__ == '1.0.14': + self.assertIsNone(schema_proxy.find('/test_element')) # Not implemented! + else: + self.assertEqual(schema_proxy.find('/test_element'), schema.elements['test_element']) + def test_is_instance_api(self): self.assertFalse(self.schema_proxy.is_instance(True, '{%s}integer' % XSD_NAMESPACE)) self.assertTrue(self.schema_proxy.is_instance(5, '{%s}integer' % XSD_NAMESPACE)) diff --git a/tests/test_xpath2_parser.py b/tests/test_xpath2_parser.py index 28522c4..6917c6c 100644 --- a/tests/test_xpath2_parser.py +++ b/tests/test_xpath2_parser.py @@ -661,7 +661,8 @@ class XPath2ParserTest(test_xpath1_parser.XPath1ParserTest): if self.etree is lxml_etree: prefixes = {'p0', 'p1'} else: - prefixes = {'p0', 'p2', 'fn', 'xlink', 'err'} | {x for x in self.etree._namespace_map.values()} + prefixes = {'p0', 'p2', 'fn', 'xlink', 'err', 'vc', 'xslt', '', 'hfp'} + prefixes |= {x for x in self.etree._namespace_map.values()} self.check_selector("fn:in-scope-prefixes(.)", root, prefixes, namespaces={'p0': 'ns0', 'p2': 'ns2'}) def test_string_constructors(self): diff --git a/tests/test_xpath_context.py b/tests/test_xpath_context.py index 045e2cf..74d37de 100644 --- a/tests/test_xpath_context.py +++ b/tests/test_xpath_context.py @@ -53,8 +53,15 @@ class XPathContextTest(unittest.TestCase): context = XPathContext(root) - self.assertEqual(context.get_path(root), '') - self.assertEqual(context.get_path(root[0]), 'B1') + self.assertEqual(context.get_path(root), '/A') + self.assertEqual(context.get_path(root[0]), '/A/B1') + self.assertEqual(context.get_path(root[0][0]), '/A/B1/C1') + self.assertEqual(context.get_path(root[1]), '/A/B2') + self.assertEqual(context.get_path(root[2]), '/A/B3') + self.assertEqual(context.get_path(root[2][0]), '/A/B3/C1') + self.assertEqual(context.get_path(root[2][1]), '/A/B3/C2') + context._elem = root[2][1] + self.assertEqual(context.get_path(AttributeNode('max', '10')), '/A/B3/C2/@max') def test_iter_attributes(self): root = ElementTree.XML('')