refactor DepList.sort() to a standalone dependency_sort() function

Renamed the suds.xsd.deplist module to suds.xsd.depsort. Removed the DepList class and replaced its main/only functionality with a single dependency_sort() function taking a dependency dictionary and returning the same list that used to be returned by the DepList.sort() method. The returned list's contents matches the items returned by the dependency dictionary's items() method, but sorted so that dependencies come first. Updated project release notes. Additional included changes: * cleaned up the test_dependency_sort.py test module - cleanly separated basic tests, test utilities and test utility tests * added a test to make sure dependency_sort() does not modify its input data * documented that any entries found listed as dependencies, but that do not have their own dependencies listed as well, are logged & ignored
2014-06-27 16:17:25 +02:00 · 2014-06-27 16:17:25 +02:00 · 085165042d
parent 8086107bec
commit 085165042d
5 changed files with 257 additions and 144 deletions
--- a/README.rst
+++ b/README.rst
@ -166,6 +166,15 @@ version 0.7 (development)
  ``suds.sax.element.Element`` with a non-``Element`` parent.
 * ``suds.xsd.sxbase.SchemaObject.content()`` now runs in linear instead of
  quadratic time.
+* ``DepList`` class replaced with a simple ``dependency_sort()`` function taking
+  a single dependency dictionary as input.
+
+  * The original implementation's interface was too heavy-weight with no added
+    value.
+  * Anything tried with the original interface outside the basic use-case
+    covered by ``dependency_sort()`` was actually or could be easily broken.
+  * ``suds.xsd.deplist`` module renamed to ``suds.xsd.depsort``.
+
 * ``suds.cache`` module cleanup.

  * Fixed ``FileCache`` default cache location related security issue. Many
--- a/suds/xsd/deplist.py
+++ b/suds/xsd/deplist.py
@ -1,83 +0,0 @@
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the (LGPL) GNU Lesser General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU Library Lesser General Public License
-# for more details at ( http://www.gnu.org/licenses/lgpl.html ).
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation, Inc.,
-# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-# written by: Jeff Ortel ( jortel@redhat.com )
-
-"""
-The I{deplist} module defines a class for performing topological dependency
-sorting - dependency entries before those that depend on them.
-
-"""
-
-from suds import *
-
-from logging import getLogger
-log = getLogger(__name__)
-
-
-class DepList:
-    """Dependency solving list."""
-
-    def __init__(self):
-        self.__index = {}
-
-    def add(self, *items):
-        """
-        Add items to be sorted.
-
-        Items are tuples: (object, (deps,))
-
-        @param items: One or more items to be added.
-        @type items: I{item}
-
-        """
-        self.__index.update(items)
-
-    def sort(self):
-        """
-        Sort the list based on dependencies.
-
-        If B is directly or indirectly dependent on A and they are not both a
-        part of the same dependency cycle (i.e. then A is neither directly nor
-        indirectly dependent on B) then A needs to come before B.
-
-        If A and B are a part of the same dependency cycle, i.e. if they are
-        both directly or indirectly dependent on each other, then it does not
-        matter which comes first.
-
-        Result contains the same data objects (object + dependency collection)
-        as given on input, but packaged in different items/tuples, i.e. the
-        returned items will 'equal' but not 'the same'.
-
-        @return: The sorted items.
-        @rtype: list
-
-        """
-        sorted = []
-        processed = set()
-        for key, deps in self.__index.iteritems():
-            self.__sort_r(sorted, processed, key, deps)
-        return sorted
-
-    def __sort_r(self, sorted, processed, key, deps):
-        """Recursive topological sort implementation."""
-        if key in processed:
-            return
-        processed.add(key)
-        for dep_key in deps:
-            dep_deps = self.__index.get(dep_key)
-            if dep_deps is None:
-                log.debug('"%s" not found, skipped', Repr(dep_key))
-                continue
-            self.__sort_r(sorted, processed, dep_key, dep_deps)
-        sorted.append((key, deps))
--- a/suds/xsd/depsort.py
+++ b/suds/xsd/depsort.py
@ -0,0 +1,71 @@
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the (LGPL) GNU Lesser General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Library Lesser General Public License
+# for more details at ( http://www.gnu.org/licenses/lgpl.html ).
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# written by: Jeff Ortel ( jortel@redhat.com )
+
+"""
+Dependency/topological sort implementation.
+
+"""
+
+from suds import *
+
+from logging import getLogger
+log = getLogger(__name__)
+
+
+def dependency_sort(dependency_tree):
+    """
+    Sorts items 'dependencies first' in a given dependency tree.
+
+    A dependency tree is a dictionary mapping an object to a collection its
+    dependency objects.
+
+    Result is property sorted list of items, where each item is a 2-tuple
+    containing an object and its dependency list, as given in the input
+    dependency tree.
+
+    If B is directly or indirectly dependent on A and they are not both a part
+    of the same dependency cycle (i.e. then A is neither directly nor
+    indirectly dependent on B) then A needs to come before B.
+
+    If A and B are a part of the same dependency cycle, i.e. if they are both
+    directly or indirectly dependent on each other, then it does not matter
+    which comes first.
+
+    Any entries found listed as dependencies, but that do not have their own
+    dependencies listed as well, are logged & ignored.
+
+    @return: The sorted items.
+    @rtype: list
+
+    """
+    sorted = []
+    processed = set()
+    for key, deps in dependency_tree.iteritems():
+        _sort_r(sorted, processed, key, deps, dependency_tree)
+    return sorted
+
+
+def _sort_r(sorted, processed, key, deps, dependency_tree):
+    """Recursive topological sort implementation."""
+    if key in processed:
+        return
+    processed.add(key)
+    for dep_key in deps:
+        dep_deps = dependency_tree.get(dep_key)
+        if dep_deps is None:
+            log.debug('"%s" not found, skipped', Repr(dep_key))
+            continue
+        _sort_r(sorted, processed, dep_key, dep_deps, dependency_tree)
+    sorted.append((key, deps))
--- a/suds/xsd/schema.py
+++ b/suds/xsd/schema.py
@ -24,7 +24,7 @@ and targeted denormalization.

 from suds import *
 from suds.xsd import *
-from suds.xsd.deplist import DepList
+from suds.xsd.depsort import dependency_sort
 from suds.xsd.sxbuiltin import *
 from suds.xsd.sxbase import SchemaObject
 from suds.xsd.sxbasic import Factory as BasicFactory
@ -330,14 +330,13 @@ class Schema(UnicodeMixin):
        indexes = {}
        for child in self.children:
            child.content(all)
-        deplist = DepList()
+        dependencies = {}
        for x in all:
            x.qualify()
            midx, deps = x.dependencies()
-            item = (x, tuple(deps))
-            deplist.add(item)
+            dependencies[x] = deps
            indexes[x] = midx
-        for x, deps in deplist.sort():
+        for x, deps in dependency_sort(dependencies):
            midx = indexes.get(x)
            if midx is None:
                continue
--- a/tests/test_dependency_sort.py
+++ b/tests/test_dependency_sort.py
@ -26,12 +26,13 @@ import testutils
 if __name__ == "__main__":
    testutils.run_using_pytest(globals())

-from suds.xsd.deplist import DepList
+from suds.xsd.depsort import dependency_sort

 import pytest
-
 from six import iteritems

+import copy
+

 # some of the tests in this module make sense only with assertions enabled
 # (note though that pytest's assertion rewriting technique, as used by default
@ -46,28 +47,121 @@ else:
    assertions_enabled = False


-def test_dependency_sort():
-    # f --+-----+-----+
-    # |   |     |     |
-    # |   v     v     v
-    # |   e --> d --> c --> b
-    # |   |           |     |
-    # +---+-----------+-----+--> a --> x
-    dependency_list = [
-        ("c", ("a", "b")),
-        ("e", ("d", "a")),
-        ("d", ("c",)),
-        ("b", ("a",)),
-        ("f", ("e", "c", "d", "a")),
-        ("a", ("x",)),
-        ("x", ())]
-    input = [x[0] for x in dependency_list]
-    deplist = DepList()
-    deplist.add(*dependency_list)
-    result = deplist.sort()
-    assert sorted(result) == sorted(dependency_list)
-    _assert_dependency_order((x[0] for x in result), dict(dependency_list))
+# shared test data

+# f --+-----+-----+
+# |   |     |     |
+# |   v     v     v
+# |   e --> d --> c --> b
+# |   |           |     |
+# +---+-----------+-----+--> a --> x
+_test_dependency_tree = {
+    "x": (),
+    "a": ("x",),
+    "b": ("a",),
+    "c": ("a", "b"),
+    "d": ("c",),
+    "e": ("d", "a"),
+    "f": ("e", "c", "d", "a")}
+
+
+def test_dependency_sort():
+    dependency_tree = _test_dependency_tree
+    result = dependency_sort(dependency_tree)
+    assert sorted(result) == sorted(iteritems(dependency_tree))
+    _assert_dependency_order((x[0] for x in result), dependency_tree)
+
+
+def test_dependency_sort_does_not_mutate_input():
+    dependency_tree = _test_dependency_tree
+
+    # save the original dependency tree structure information
+    expected_deps = {}
+    expected_deps_ids = {}
+    for x, y in iteritems(dependency_tree):
+        expected_deps[x] = copy.copy(y)
+        expected_deps_ids[id(x)] = id(y)
+
+    # run the dependency sort
+    dependency_sort(dependency_tree)
+
+    # verify that the dependency tree structure is unchanged
+    assert len(dependency_tree) == len(expected_deps)
+    for key, deps in iteritems(dependency_tree):
+        # same deps for each key
+        assert id(deps) == expected_deps_ids[id(key)]
+        # deps structure compare with the original copy
+        assert deps == expected_deps[key]
+        # explicit deps content id matching just in case the container's __eq__
+        # is not precise enough
+        _assert_same_content_set(deps, expected_deps[key])
+
+
+###############################################################################
+#
+# Test utilities.
+#
+###############################################################################
+
+def _assert_dependency_order(sequence, dependencies):
+    """
+    Assert that a sequence is ordered dependencies first.
+
+    The only way an earlier entry is allowed to have a later entry as its
+    dependency is if they are both part of the same dependency cycle.
+
+    """
+    sequence = list(sequence)
+    dependency_closure = _transitive_dependency_closure(dependencies)
+    for i, a in enumerate(sequence):
+        for b in sequence[i + 1:]:
+            a_dependent_on_b = b in dependency_closure[a]
+            b_dependent_on_a = a in dependency_closure[b]
+            assert b_dependent_on_a or not a_dependent_on_b
+
+
+def _assert_same_content_set(lhs, rhs):
+    """Assert that two iterables have the same content (order independent)."""
+    counter_lhs = _counter(lhs)
+    counter_rhs = _counter(rhs)
+    assert counter_lhs == counter_rhs
+
+
+def _counter(iterable):
+    """Return an {id: count} dictionary for all items from `iterable`."""
+    counter = {}
+    for x in iterable:
+        counter[id(x)] = counter.setdefault(id(x), 0) + 1
+    return counter
+
+
+def _transitive_dependency_closure(dependencies):
+    """
+    Returns a transitive dependency closure.
+
+    If target A is dependent on target B, and target B is in turn dependent on
+    target C, then target A is also implicitly dependent on target C. A
+    transitive dependency closure is an expanded dependency collection so that
+    in it all such implicit dependencies have been explicitly specified.
+
+    """
+    def clone(deps):
+        return dict((k, set(v)) for k, v in iteritems(deps))
+    closure = None
+    new = clone(dependencies)
+    while new != closure:
+        closure = clone(new)
+        for k, deps in iteritems(closure):
+            for dep in deps:
+                new[k] |= closure[dep]
+    return closure
+
+
+###############################################################################
+#
+# Test utility tests.
+#
+###############################################################################

@pytest.mark.skipif(not assertions_enabled, reason="assertions disabled")
@pytest.mark.parametrize("sequence, dependencies", (
@ -106,40 +200,63 @@ def test_assert_dependency_order__valid(sequence, dependencies):
    _assert_dependency_order(sequence, dependencies)


-def _assert_dependency_order(sequence, dependencies):
-    """
-    Assert that a sequence is ordered dependencies first.
-
-    The only way an earlier entry is allowed to have a later entry as its
-    dependency is if they are both part of the same dependency cycle.
-
-    """
-    sequence = list(sequence)
-    dependency_closure = _transitive_dependency_closure(dependencies)
-    for i, a in enumerate(sequence):
-        for b in sequence[i + 1:]:
-            a_dependent_on_b = b in dependency_closure[a]
-            b_dependent_on_a = a in dependency_closure[b]
-            assert b_dependent_on_a or not a_dependent_on_b
+@pytest.mark.skipif(not assertions_enabled, reason="assertions disabled")
+@pytest.mark.parametrize("lhs, rhs", (
+    # empty
+    #    ([1, 2.0, 6], [1, 2, 6]),
+    ((), (1,)),
+    ([2], []),
+    ([], (4, 2)),
+    ([], (x for x in [8, 4])),
+    ((x for x in [1, 1]), []),
+    # without duplicates
+    ([1, 2, 3], [1, 2, 4]),
+    ([1, 2, 3], [1, 2]),
+    ([1, 2, 3], [1, 4]),
+    ([0], [0.0]),
+    ([0], [0.0]),
+    # with duplicates
+    ([1, 1], [1]),
+    ((x for x in [1, 1]), [1]),
+    ([1, 1], [1, 2, 1]),
+    ([1, 1, 2, 2], [1, 2, 1]),
+    # different object ids
+    ([object()], [object()])))
+def test_assert_same_content_set__invalid(lhs, rhs):
+    pytest.raises(AssertionError, _assert_same_content_set, lhs, rhs)


-def _transitive_dependency_closure(dependencies):
-    """
-    Returns a transitive dependency closure.
+@pytest.mark.parametrize("lhs, rhs", (
+    # empty
+    ((), ()),
+    ([], []),
+    ([], ()),
+    ([], (x for x in [])),
+    ((x for x in []), []),
+    # matching without duplicates
+    ([1, 2, 6], [1, 2, 6]),
+    ([1, 2, 6], [6, 2, 1]),
+    # matching with duplicates
+    ([1, 2, 2, 6], [6, 2, 1, 2]),
+    # matching object ids
+    ([_assert_same_content_set], [_assert_same_content_set])))
+def test_assert_same_content_set__valid(lhs, rhs):
+    _assert_same_content_set(lhs, rhs)

-    If target A is dependent on target B, and target B is in turn dependent on
-    target C, then target A is also implicitly dependent on target C. A
-    transitive dependency closure is an expanded dependency collection so that
-    in it all such implicit dependencies have been explicitly specified.

-    """
-    def clone(deps):
-        return dict((k, set(v)) for k, v in iteritems(deps))
-    closure = None
-    new = clone(dependencies)
-    while new != closure:
-        closure = clone(new)
-        for k, deps in iteritems(closure):
-            for dep in deps:
-                new[k] |= closure[dep]
-    return closure
+def test_counter():
+    a = object()
+    b = object()
+    c = object()
+    d = object()
+    input = [a, b, b, c, c, d, a, a, a, d, b, b, b, b, b, a, d]
+    result = _counter(input)
+    assert len(result) == 4
+    assert result[id(a)] == input.count(a)
+    assert result[id(b)] == input.count(b)
+    assert result[id(c)] == input.count(c)
+    assert result[id(d)] == input.count(d)
+
+
+def test_counter__empty():
+    assert _counter([]) == {}