utils: add a batch_queryset method to load large prefetched queryset without exhausting memory (#13587)

2018-05-02 15:37:12 +02:00 · 2018-05-02 15:37:12 +02:00 · 516cb4f890
parent 8964303359
commit 516cb4f890
1 changed files with 16 additions and 1 deletions
--- a/src/authentic2/utils.py
+++ b/src/authentic2/utils.py
@ -10,7 +10,7 @@ import datetime
 import copy

 from functools import wraps
-from itertools import islice, chain
+from itertools import islice, chain, count

 from importlib import import_module

@ -770,9 +770,24 @@ def batch(iterable, size):
    sourceiter = iter(iterable)
    while True:
        batchiter = islice(sourceiter, size)
+        # call next() at least one time to advance, if the caller does not
+        # consume the returned iterators, sourceiter will never be exhausted.
        yield chain([batchiter.next()], batchiter)


+def batch_queryset(qs, size=1000):
+    '''Batch prefetched potentially very large queryset, it's a middle ground
+       between using .iterator() which cannot be prefetched and prefetching a full
+       table, which can take a larte place in memory.
+    '''
+    for i in count(0):
+        chunk = qs[i * size:(i + 1) * size]
+        if not chunk:
+            break
+        for row in chunk:
+            yield row
+
+
 def lower_keys(d):
    '''Convert all keys in dictionary d to lowercase'''
    return dict((key.lower(), value) for key, value in d.iteritems())