debian-zeep/src/zeep/loader.py

117 lines
3.5 KiB
Python

import os.path
from defusedxml.lxml import fromstring
from lxml import etree
from six.moves.urllib.parse import urljoin, urlparse
from zeep.exceptions import XMLSyntaxError
class ImportResolver(etree.Resolver):
"""Custom lxml resolve to use the transport object"""
def __init__(self, transport):
self.transport = transport
def resolve(self, url, pubid, context):
if urlparse(url).scheme in ('http', 'https'):
content = self.transport.load(url)
return self.resolve_string(content, context)
def parse_xml(content, transport, base_url=None, strict=True,
xml_huge_tree=False):
"""Parse an XML string and return the root Element.
:param content: The XML string
:type content: str
:param transport: The transport instance to load imported documents
:type transport: zeep.transports.Transport
:param base_url: The base url of the document, used to make relative
lookups absolute.
:type base_url: str
:param strict: boolean to indicate if the lxml should be parsed a 'strict'.
If false then the recover mode is enabled which tries to parse invalid
XML as best as it can.
:param xml_huge_tree: boolean to indicate if lxml should process very
large XML content.
:type strict: boolean
:returns: The document root
:rtype: lxml.etree._Element
"""
recover = not strict
parser = etree.XMLParser(remove_comments=True, resolve_entities=False,
recover=recover, huge_tree=xml_huge_tree)
parser.resolvers.add(ImportResolver(transport))
try:
return fromstring(content, parser=parser, base_url=base_url)
except etree.XMLSyntaxError as exc:
raise XMLSyntaxError(
"Invalid XML content received (%s)" % exc.msg,
content=content
)
def load_external(url, transport, base_url=None, strict=True):
"""Load an external XML document.
:param url:
:param transport:
:param base_url:
:param strict: boolean to indicate if the lxml should be parsed a 'strict'.
If false then the recover mode is enabled which tries to parse invalid
XML as best as it can.
:type strict: boolean
"""
if hasattr(url, 'read'):
content = url.read()
else:
if base_url:
url = absolute_location(url, base_url)
content = transport.load(url)
return parse_xml(content, transport, base_url, strict=strict)
def absolute_location(location, base):
"""Make an url absolute (if it is optional) via the passed base url.
:param location: The (relative) url
:type location: str
:param base: The base location
:type base: str
:returns: An absolute URL
:rtype: str
"""
if location == base:
return location
if urlparse(location).scheme in ('http', 'https', 'file'):
return location
if base and urlparse(base).scheme in ('http', 'https', 'file'):
return urljoin(base, location)
else:
if os.path.isabs(location):
return location
if base:
return os.path.realpath(
os.path.join(os.path.dirname(base), location))
return location
def is_relative_path(value):
"""Check if the given value is a relative path
:param value: The value
:type value: str
:returns: Boolean indicating if the url is relative. If it is absolute then
False is returned.
:rtype: boolean
"""
if urlparse(value).scheme in ('http', 'https', 'file'):
return False
return not os.path.isabs(value)