refactor DepList.sort() to a standalone dependency_sort() function

Renamed the suds.xsd.deplist module to suds.xsd.depsort.
Removed the DepList class and replaced its main/only functionality with a single
dependency_sort() function taking a dependency dictionary and returning the same
list that used to be returned by the DepList.sort() method.

The returned list's contents matches the items returned by the dependency
dictionary's items() method, but sorted so that dependencies come first.

Updated project release notes.

Additional included changes:
 * cleaned up the test_dependency_sort.py test module - cleanly separated
   basic tests, test utilities and test utility tests
 * added a test to make sure dependency_sort() does not modify its input data
 * documented that any entries found listed as dependencies, but that do not
   have their own dependencies listed as well, are logged & ignored
This commit is contained in:
Jurko Gospodnetić 2014-06-27 16:17:25 +02:00
parent 8086107bec
commit 085165042d
5 changed files with 257 additions and 144 deletions

View File

@ -166,6 +166,15 @@ version 0.7 (development)
``suds.sax.element.Element`` with a non-``Element`` parent.
* ``suds.xsd.sxbase.SchemaObject.content()`` now runs in linear instead of
quadratic time.
* ``DepList`` class replaced with a simple ``dependency_sort()`` function taking
a single dependency dictionary as input.
* The original implementation's interface was too heavy-weight with no added
value.
* Anything tried with the original interface outside the basic use-case
covered by ``dependency_sort()`` was actually or could be easily broken.
* ``suds.xsd.deplist`` module renamed to ``suds.xsd.depsort``.
* ``suds.cache`` module cleanup.
* Fixed ``FileCache`` default cache location related security issue. Many

View File

@ -1,83 +0,0 @@
# This program is free software; you can redistribute it and/or modify it under
# the terms of the (LGPL) GNU Lesser General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Library Lesser General Public License
# for more details at ( http://www.gnu.org/licenses/lgpl.html ).
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# written by: Jeff Ortel ( jortel@redhat.com )
"""
The I{deplist} module defines a class for performing topological dependency
sorting - dependency entries before those that depend on them.
"""
from suds import *
from logging import getLogger
log = getLogger(__name__)
class DepList:
"""Dependency solving list."""
def __init__(self):
self.__index = {}
def add(self, *items):
"""
Add items to be sorted.
Items are tuples: (object, (deps,))
@param items: One or more items to be added.
@type items: I{item}
"""
self.__index.update(items)
def sort(self):
"""
Sort the list based on dependencies.
If B is directly or indirectly dependent on A and they are not both a
part of the same dependency cycle (i.e. then A is neither directly nor
indirectly dependent on B) then A needs to come before B.
If A and B are a part of the same dependency cycle, i.e. if they are
both directly or indirectly dependent on each other, then it does not
matter which comes first.
Result contains the same data objects (object + dependency collection)
as given on input, but packaged in different items/tuples, i.e. the
returned items will 'equal' but not 'the same'.
@return: The sorted items.
@rtype: list
"""
sorted = []
processed = set()
for key, deps in self.__index.iteritems():
self.__sort_r(sorted, processed, key, deps)
return sorted
def __sort_r(self, sorted, processed, key, deps):
"""Recursive topological sort implementation."""
if key in processed:
return
processed.add(key)
for dep_key in deps:
dep_deps = self.__index.get(dep_key)
if dep_deps is None:
log.debug('"%s" not found, skipped', Repr(dep_key))
continue
self.__sort_r(sorted, processed, dep_key, dep_deps)
sorted.append((key, deps))

71
suds/xsd/depsort.py Normal file
View File

@ -0,0 +1,71 @@
# This program is free software; you can redistribute it and/or modify it under
# the terms of the (LGPL) GNU Lesser General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Library Lesser General Public License
# for more details at ( http://www.gnu.org/licenses/lgpl.html ).
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# written by: Jeff Ortel ( jortel@redhat.com )
"""
Dependency/topological sort implementation.
"""
from suds import *
from logging import getLogger
log = getLogger(__name__)
def dependency_sort(dependency_tree):
"""
Sorts items 'dependencies first' in a given dependency tree.
A dependency tree is a dictionary mapping an object to a collection its
dependency objects.
Result is property sorted list of items, where each item is a 2-tuple
containing an object and its dependency list, as given in the input
dependency tree.
If B is directly or indirectly dependent on A and they are not both a part
of the same dependency cycle (i.e. then A is neither directly nor
indirectly dependent on B) then A needs to come before B.
If A and B are a part of the same dependency cycle, i.e. if they are both
directly or indirectly dependent on each other, then it does not matter
which comes first.
Any entries found listed as dependencies, but that do not have their own
dependencies listed as well, are logged & ignored.
@return: The sorted items.
@rtype: list
"""
sorted = []
processed = set()
for key, deps in dependency_tree.iteritems():
_sort_r(sorted, processed, key, deps, dependency_tree)
return sorted
def _sort_r(sorted, processed, key, deps, dependency_tree):
"""Recursive topological sort implementation."""
if key in processed:
return
processed.add(key)
for dep_key in deps:
dep_deps = dependency_tree.get(dep_key)
if dep_deps is None:
log.debug('"%s" not found, skipped', Repr(dep_key))
continue
_sort_r(sorted, processed, dep_key, dep_deps, dependency_tree)
sorted.append((key, deps))

View File

@ -24,7 +24,7 @@ and targeted denormalization.
from suds import *
from suds.xsd import *
from suds.xsd.deplist import DepList
from suds.xsd.depsort import dependency_sort
from suds.xsd.sxbuiltin import *
from suds.xsd.sxbase import SchemaObject
from suds.xsd.sxbasic import Factory as BasicFactory
@ -330,14 +330,13 @@ class Schema(UnicodeMixin):
indexes = {}
for child in self.children:
child.content(all)
deplist = DepList()
dependencies = {}
for x in all:
x.qualify()
midx, deps = x.dependencies()
item = (x, tuple(deps))
deplist.add(item)
dependencies[x] = deps
indexes[x] = midx
for x, deps in deplist.sort():
for x, deps in dependency_sort(dependencies):
midx = indexes.get(x)
if midx is None:
continue

View File

@ -26,12 +26,13 @@ import testutils
if __name__ == "__main__":
testutils.run_using_pytest(globals())
from suds.xsd.deplist import DepList
from suds.xsd.depsort import dependency_sort
import pytest
from six import iteritems
import copy
# some of the tests in this module make sense only with assertions enabled
# (note though that pytest's assertion rewriting technique, as used by default
@ -46,28 +47,121 @@ else:
assertions_enabled = False
def test_dependency_sort():
# f --+-----+-----+
# | | | |
# | v v v
# | e --> d --> c --> b
# | | | |
# +---+-----------+-----+--> a --> x
dependency_list = [
("c", ("a", "b")),
("e", ("d", "a")),
("d", ("c",)),
("b", ("a",)),
("f", ("e", "c", "d", "a")),
("a", ("x",)),
("x", ())]
input = [x[0] for x in dependency_list]
deplist = DepList()
deplist.add(*dependency_list)
result = deplist.sort()
assert sorted(result) == sorted(dependency_list)
_assert_dependency_order((x[0] for x in result), dict(dependency_list))
# shared test data
# f --+-----+-----+
# | | | |
# | v v v
# | e --> d --> c --> b
# | | | |
# +---+-----------+-----+--> a --> x
_test_dependency_tree = {
"x": (),
"a": ("x",),
"b": ("a",),
"c": ("a", "b"),
"d": ("c",),
"e": ("d", "a"),
"f": ("e", "c", "d", "a")}
def test_dependency_sort():
dependency_tree = _test_dependency_tree
result = dependency_sort(dependency_tree)
assert sorted(result) == sorted(iteritems(dependency_tree))
_assert_dependency_order((x[0] for x in result), dependency_tree)
def test_dependency_sort_does_not_mutate_input():
dependency_tree = _test_dependency_tree
# save the original dependency tree structure information
expected_deps = {}
expected_deps_ids = {}
for x, y in iteritems(dependency_tree):
expected_deps[x] = copy.copy(y)
expected_deps_ids[id(x)] = id(y)
# run the dependency sort
dependency_sort(dependency_tree)
# verify that the dependency tree structure is unchanged
assert len(dependency_tree) == len(expected_deps)
for key, deps in iteritems(dependency_tree):
# same deps for each key
assert id(deps) == expected_deps_ids[id(key)]
# deps structure compare with the original copy
assert deps == expected_deps[key]
# explicit deps content id matching just in case the container's __eq__
# is not precise enough
_assert_same_content_set(deps, expected_deps[key])
###############################################################################
#
# Test utilities.
#
###############################################################################
def _assert_dependency_order(sequence, dependencies):
"""
Assert that a sequence is ordered dependencies first.
The only way an earlier entry is allowed to have a later entry as its
dependency is if they are both part of the same dependency cycle.
"""
sequence = list(sequence)
dependency_closure = _transitive_dependency_closure(dependencies)
for i, a in enumerate(sequence):
for b in sequence[i + 1:]:
a_dependent_on_b = b in dependency_closure[a]
b_dependent_on_a = a in dependency_closure[b]
assert b_dependent_on_a or not a_dependent_on_b
def _assert_same_content_set(lhs, rhs):
"""Assert that two iterables have the same content (order independent)."""
counter_lhs = _counter(lhs)
counter_rhs = _counter(rhs)
assert counter_lhs == counter_rhs
def _counter(iterable):
"""Return an {id: count} dictionary for all items from `iterable`."""
counter = {}
for x in iterable:
counter[id(x)] = counter.setdefault(id(x), 0) + 1
return counter
def _transitive_dependency_closure(dependencies):
"""
Returns a transitive dependency closure.
If target A is dependent on target B, and target B is in turn dependent on
target C, then target A is also implicitly dependent on target C. A
transitive dependency closure is an expanded dependency collection so that
in it all such implicit dependencies have been explicitly specified.
"""
def clone(deps):
return dict((k, set(v)) for k, v in iteritems(deps))
closure = None
new = clone(dependencies)
while new != closure:
closure = clone(new)
for k, deps in iteritems(closure):
for dep in deps:
new[k] |= closure[dep]
return closure
###############################################################################
#
# Test utility tests.
#
###############################################################################
@pytest.mark.skipif(not assertions_enabled, reason="assertions disabled")
@pytest.mark.parametrize("sequence, dependencies", (
@ -106,40 +200,63 @@ def test_assert_dependency_order__valid(sequence, dependencies):
_assert_dependency_order(sequence, dependencies)
def _assert_dependency_order(sequence, dependencies):
"""
Assert that a sequence is ordered dependencies first.
The only way an earlier entry is allowed to have a later entry as its
dependency is if they are both part of the same dependency cycle.
"""
sequence = list(sequence)
dependency_closure = _transitive_dependency_closure(dependencies)
for i, a in enumerate(sequence):
for b in sequence[i + 1:]:
a_dependent_on_b = b in dependency_closure[a]
b_dependent_on_a = a in dependency_closure[b]
assert b_dependent_on_a or not a_dependent_on_b
@pytest.mark.skipif(not assertions_enabled, reason="assertions disabled")
@pytest.mark.parametrize("lhs, rhs", (
# empty
# ([1, 2.0, 6], [1, 2, 6]),
((), (1,)),
([2], []),
([], (4, 2)),
([], (x for x in [8, 4])),
((x for x in [1, 1]), []),
# without duplicates
([1, 2, 3], [1, 2, 4]),
([1, 2, 3], [1, 2]),
([1, 2, 3], [1, 4]),
([0], [0.0]),
([0], [0.0]),
# with duplicates
([1, 1], [1]),
((x for x in [1, 1]), [1]),
([1, 1], [1, 2, 1]),
([1, 1, 2, 2], [1, 2, 1]),
# different object ids
([object()], [object()])))
def test_assert_same_content_set__invalid(lhs, rhs):
pytest.raises(AssertionError, _assert_same_content_set, lhs, rhs)
def _transitive_dependency_closure(dependencies):
"""
Returns a transitive dependency closure.
@pytest.mark.parametrize("lhs, rhs", (
# empty
((), ()),
([], []),
([], ()),
([], (x for x in [])),
((x for x in []), []),
# matching without duplicates
([1, 2, 6], [1, 2, 6]),
([1, 2, 6], [6, 2, 1]),
# matching with duplicates
([1, 2, 2, 6], [6, 2, 1, 2]),
# matching object ids
([_assert_same_content_set], [_assert_same_content_set])))
def test_assert_same_content_set__valid(lhs, rhs):
_assert_same_content_set(lhs, rhs)
If target A is dependent on target B, and target B is in turn dependent on
target C, then target A is also implicitly dependent on target C. A
transitive dependency closure is an expanded dependency collection so that
in it all such implicit dependencies have been explicitly specified.
"""
def clone(deps):
return dict((k, set(v)) for k, v in iteritems(deps))
closure = None
new = clone(dependencies)
while new != closure:
closure = clone(new)
for k, deps in iteritems(closure):
for dep in deps:
new[k] |= closure[dep]
return closure
def test_counter():
a = object()
b = object()
c = object()
d = object()
input = [a, b, b, c, c, d, a, a, a, d, b, b, b, b, b, a, d]
result = _counter(input)
assert len(result) == 4
assert result[id(a)] == input.count(a)
assert result[id(b)] == input.count(b)
assert result[id(c)] == input.count(c)
assert result[id(d)] == input.count(d)
def test_counter__empty():
assert _counter([]) == {}