snapshot: store a patch instead of serialization (#57299)
gitea-wip/wcs/pipeline/head There was a failure building this commit
Details
gitea-wip/wcs/pipeline/head There was a failure building this commit
Details
This commit is contained in:
parent
897dc44bc3
commit
0565ffe5d3
|
@ -66,28 +66,55 @@ def test_snapshot_basics(pub):
|
|||
formdef.fields = []
|
||||
formdef.store()
|
||||
|
||||
carddef = CardDef()
|
||||
carddef.name = 'testcard'
|
||||
carddef.fields = []
|
||||
carddef.store()
|
||||
# first occurence, complete snapshot stored
|
||||
assert pub.snapshot_class.count() == 1
|
||||
snapshot1 = pub.snapshot_class.get_latest('formdef', formdef.id)
|
||||
assert snapshot1.serialization is not None
|
||||
assert '>testform<' in snapshot1.serialization
|
||||
assert snapshot1.patch is None
|
||||
assert snapshot1.instance # possible to restore
|
||||
|
||||
# no changes
|
||||
formdef.store()
|
||||
assert pub.snapshot_class.count() == 1
|
||||
|
||||
# patch only
|
||||
formdef.name = 'testform2'
|
||||
formdef.store()
|
||||
assert pub.snapshot_class.count() == 2
|
||||
|
||||
carddef.name = 'testcard2'
|
||||
carddef.store()
|
||||
snapshot2 = pub.snapshot_class.get_latest('formdef', formdef.id)
|
||||
assert snapshot2.serialization is None
|
||||
assert '>testform2<' in snapshot2.patch
|
||||
assert snapshot2.instance # possible to restore
|
||||
|
||||
data_source = NamedDataSource(name='foobar')
|
||||
data_source.data_source = {'type': 'formula', 'value': repr([('1', 'un'), ('2', 'deux')])}
|
||||
data_source.store()
|
||||
# no diff with latest snap but label is given
|
||||
pub.snapshot_class.snap(instance=formdef, label="foo bar")
|
||||
assert pub.snapshot_class.count() == 3
|
||||
snapshot3 = pub.snapshot_class.get_latest('formdef', formdef.id)
|
||||
assert snapshot3.serialization is None
|
||||
assert '>testform2<' in snapshot3.patch
|
||||
assert snapshot2.patch == snapshot3.patch
|
||||
assert snapshot3.instance # possible to restore
|
||||
|
||||
# patch is longer as serialization, store serialization
|
||||
formdef.name = 'testform3'
|
||||
formdef.fields = [StringField(id=str(i), label='Test %s' % i, type='string') for i in range(0, 10)]
|
||||
formdef.store()
|
||||
assert pub.snapshot_class.count() == 4
|
||||
snapshot4 = pub.snapshot_class.get_latest('formdef', formdef.id)
|
||||
assert snapshot4.serialization is not None
|
||||
assert '>testform3<' in snapshot4.serialization
|
||||
assert snapshot4.patch is None
|
||||
assert snapshot4.instance # possible to restore
|
||||
|
||||
# no diff with latest snap but label is given
|
||||
pub.snapshot_class.snap(instance=formdef, label="foo bar")
|
||||
assert pub.snapshot_class.count() == 5
|
||||
|
||||
# check we got correct data in the serializations
|
||||
snapshot = pub.snapshot_class.get_latest('formdef', formdef.id)
|
||||
assert '>testform2<' in snapshot.serialization
|
||||
|
||||
snapshot = pub.snapshot_class.get_latest('carddef', carddef.id)
|
||||
assert '>testcard2<' in snapshot.serialization
|
||||
snapshot5 = pub.snapshot_class.get_latest('formdef', formdef.id)
|
||||
assert snapshot5.serialization is None
|
||||
assert snapshot5.patch == '' # no difference with latest snap, which has a serialization
|
||||
assert snapshot5.instance # possible to restore
|
||||
|
||||
|
||||
def test_snapshot_instance(pub):
|
||||
|
@ -117,7 +144,8 @@ def test_snapshot_instance(pub):
|
|||
snapshots = pub.snapshot_class.select_object_history(formdef)
|
||||
assert len(snapshots) == 10
|
||||
for i in range(10):
|
||||
assert snapshots[i].serialization is None
|
||||
assert snapshots[i].serialization is None # not loaded
|
||||
assert snapshots[i].patch is None # not loaded
|
||||
assert pub.snapshot_class.get(snapshots[i].id).instance.name == 'testform %s' % (9 - i)
|
||||
|
||||
snapshots = pub.snapshot_class.select_object_history(carddef)
|
||||
|
@ -703,88 +731,6 @@ def test_snapshot_workflow_variable(pub):
|
|||
assert 'sortable readonly' in resp.text
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def size_limit(pub):
|
||||
pub.snapshot_class.WCS_MAX_LEN = 100
|
||||
yield
|
||||
pub.snapshot_class.WCS_MAX_LEN = 1000000
|
||||
|
||||
|
||||
def test_workflow_snapshot_max_len(pub, size_limit):
|
||||
formdef = FormDef()
|
||||
formdef.name = 'testform'
|
||||
formdef.fields = []
|
||||
formdef.store()
|
||||
|
||||
Workflow.wipe()
|
||||
workflow = Workflow(name='test')
|
||||
workflow.store()
|
||||
|
||||
another_workflow = Workflow(name='other test')
|
||||
another_workflow.store() # same object_type - check that other instances snapshots are not deleted
|
||||
|
||||
assert formdef.id == workflow.id # same id - check other object_type snapshots are not deleted
|
||||
|
||||
# first one: saved
|
||||
assert pub.snapshot_class.count() == 3
|
||||
first_id = pub.snapshot_class.select(order_by='id')[0].id
|
||||
assert pub.snapshot_class.get(first_id).object_type == 'formdef'
|
||||
assert pub.snapshot_class.get(first_id + 1).object_type == 'workflow'
|
||||
assert pub.snapshot_class.get(first_id + 1).object_id == '1'
|
||||
old_timestamp = pub.snapshot_class.get(first_id + 1).timestamp
|
||||
assert pub.snapshot_class.get(first_id + 2).object_type == 'workflow'
|
||||
assert pub.snapshot_class.get(first_id + 2).object_id == '2'
|
||||
|
||||
# save snapshot
|
||||
pub.snapshot_class.snap(instance=workflow, label="snapshot !")
|
||||
assert pub.snapshot_class.count() == 4
|
||||
assert pub.snapshot_class.get(first_id).object_type == 'formdef'
|
||||
assert pub.snapshot_class.get(first_id + 1).object_type == 'workflow'
|
||||
assert pub.snapshot_class.get(first_id + 1).object_id == '1'
|
||||
assert pub.snapshot_class.get(first_id + 1).label is None
|
||||
assert pub.snapshot_class.get(first_id + 1).timestamp == old_timestamp
|
||||
assert pub.snapshot_class.get(first_id + 1).instance.name == 'test'
|
||||
assert pub.snapshot_class.get(first_id + 2).object_type == 'workflow'
|
||||
assert pub.snapshot_class.get(first_id + 2).object_id == '2'
|
||||
assert pub.snapshot_class.get(first_id + 3).object_type == 'workflow'
|
||||
assert pub.snapshot_class.get(first_id + 3).object_id == '1'
|
||||
assert pub.snapshot_class.get(first_id + 3).label == "snapshot !"
|
||||
assert pub.snapshot_class.get(first_id + 3).instance.name == 'test'
|
||||
|
||||
# no changes
|
||||
workflow.store()
|
||||
assert pub.snapshot_class.count() == 4
|
||||
assert pub.snapshot_class.get(first_id).object_type == 'formdef'
|
||||
assert pub.snapshot_class.get(first_id + 1).object_type == 'workflow'
|
||||
assert pub.snapshot_class.get(first_id + 1).object_id == '1'
|
||||
assert pub.snapshot_class.get(first_id + 1).label is None
|
||||
assert pub.snapshot_class.get(first_id + 1).timestamp == old_timestamp
|
||||
assert pub.snapshot_class.get(first_id + 1).instance.name == 'test'
|
||||
assert pub.snapshot_class.get(first_id + 2).object_type == 'workflow'
|
||||
assert pub.snapshot_class.get(first_id + 2).object_id == '2'
|
||||
assert pub.snapshot_class.get(first_id + 3).object_type == 'workflow'
|
||||
assert pub.snapshot_class.get(first_id + 3).object_id == '1'
|
||||
assert pub.snapshot_class.get(first_id + 3).label == "snapshot !"
|
||||
assert pub.snapshot_class.get(first_id + 3).instance.name == 'test'
|
||||
|
||||
# with changes
|
||||
workflow.name = 'foo bar'
|
||||
workflow.store()
|
||||
assert pub.snapshot_class.count() == 4
|
||||
assert pub.snapshot_class.get(first_id).object_type == 'formdef'
|
||||
assert pub.snapshot_class.get(first_id + 2).object_type == 'workflow'
|
||||
assert pub.snapshot_class.get(first_id + 2).object_id == '2'
|
||||
assert pub.snapshot_class.get(first_id + 3).object_type == 'workflow'
|
||||
assert pub.snapshot_class.get(first_id + 3).object_id == '1'
|
||||
assert pub.snapshot_class.get(first_id + 3).label == "snapshot !"
|
||||
assert pub.snapshot_class.get(first_id + 3).instance.name == 'test'
|
||||
assert pub.snapshot_class.get(first_id + 4).object_type == 'workflow'
|
||||
assert pub.snapshot_class.get(first_id + 4).object_id == '1'
|
||||
assert pub.snapshot_class.get(first_id + 4).label is None
|
||||
assert pub.snapshot_class.get(first_id + 4).timestamp > old_timestamp
|
||||
assert pub.snapshot_class.get(first_id + 4).instance.name == 'foo bar'
|
||||
|
||||
|
||||
def test_pickle_erroneous_snapshot_object(pub):
|
||||
# check snapshot object attribute is not restored
|
||||
formdef = FormDef()
|
||||
|
|
|
@ -117,7 +117,7 @@ class SnapshotDirectory(Directory):
|
|||
self.snapshot.timestamp.strftime('%Y%m%d-%H%M'),
|
||||
),
|
||||
)
|
||||
return '<?xml version="1.0"?>\n' + self.snapshot.serialization
|
||||
return '<?xml version="1.0"?>\n' + self.snapshot.get_serialization()
|
||||
|
||||
def restore(self):
|
||||
form = Form(enctype='multipart/form-data')
|
||||
|
|
156
wcs/snapshots.py
156
wcs/snapshots.py
|
@ -14,13 +14,14 @@
|
|||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import difflib
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from django.utils.timezone import now
|
||||
from quixote import get_publisher, get_session
|
||||
|
||||
from wcs.qommon import _, misc
|
||||
from wcs.qommon.storage import Null
|
||||
|
||||
|
||||
class UnknownUser:
|
||||
|
@ -28,6 +29,97 @@ class UnknownUser:
|
|||
return str(_('unknown user'))
|
||||
|
||||
|
||||
def indent(tree, space=" ", level=0):
|
||||
# backport from Lib/xml/etree/ElementTree.py python 3.9
|
||||
if isinstance(tree, ET.ElementTree):
|
||||
tree = tree.getroot()
|
||||
if level < 0:
|
||||
raise ValueError(f"Initial indentation level must be >= 0, got {level}")
|
||||
if len(tree) == 0:
|
||||
return
|
||||
|
||||
# Reduce the memory consumption by reusing indentation strings.
|
||||
indentations = ["\n" + level * space]
|
||||
|
||||
def _indent_children(elem, level):
|
||||
# Start a new indentation level for the first child.
|
||||
child_level = level + 1
|
||||
try:
|
||||
child_indentation = indentations[child_level]
|
||||
except IndexError:
|
||||
child_indentation = indentations[level] + space
|
||||
indentations.append(child_indentation)
|
||||
|
||||
if not elem.text or not elem.text.strip():
|
||||
elem.text = child_indentation
|
||||
|
||||
for child in elem:
|
||||
if len(child):
|
||||
_indent_children(child, child_level)
|
||||
if not child.tail or not child.tail.strip():
|
||||
child.tail = child_indentation
|
||||
|
||||
# Dedent after the last child by overwriting the previous indentation.
|
||||
if not child.tail.strip():
|
||||
child.tail = indentations[level]
|
||||
|
||||
_indent_children(tree, 0)
|
||||
|
||||
|
||||
_no_eol = "\\ No newline at end of file"
|
||||
_hdr_pat = re.compile(r"^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@$")
|
||||
|
||||
|
||||
def make_patch(a, b):
|
||||
"""
|
||||
Get unified string diff between two strings. Trims top two lines.
|
||||
Returns empty string if strings are identical.
|
||||
"""
|
||||
diffs = difflib.unified_diff(a.splitlines(True), b.splitlines(True), n=0)
|
||||
try:
|
||||
_, _ = next(diffs), next(diffs)
|
||||
except StopIteration:
|
||||
pass
|
||||
return ''.join([d if d[-1] == '\n' else d + '\n' + _no_eol + '\n' for d in diffs])
|
||||
|
||||
|
||||
def apply_patch(s, patch, revert=False):
|
||||
"""
|
||||
Apply patch to string s to recover newer string.
|
||||
If revert is True, treat s as the newer string, recover older string.
|
||||
"""
|
||||
s = s.splitlines(True)
|
||||
p = patch.splitlines(True)
|
||||
t = ''
|
||||
i = sl = 0
|
||||
(midx, sign) = (1, '+') if not revert else (3, '-')
|
||||
while i < len(p) and p[i].startswith(("---", "+++")):
|
||||
i += 1 # skip header lines
|
||||
while i < len(p):
|
||||
m = _hdr_pat.match(p[i])
|
||||
if not m:
|
||||
raise Exception("Bad patch -- regex mismatch [line " + str(i) + "]")
|
||||
_l = int(m.group(midx)) - 1 + (m.group(midx + 1) == '0')
|
||||
if sl > _l or _l > len(s):
|
||||
raise Exception("Bad patch -- bad line num [line " + str(i) + "]")
|
||||
t += ''.join(s[sl:_l])
|
||||
sl = _l
|
||||
i += 1
|
||||
while i < len(p) and p[i][0] != '@':
|
||||
if i + 1 < len(p) and p[i + 1][0] == '\\':
|
||||
line = p[i][:-1]
|
||||
i += 2
|
||||
else:
|
||||
line = p[i]
|
||||
i += 1
|
||||
if len(line) > 0:
|
||||
if line[0] == sign or line[0] == ' ':
|
||||
t += line[1:]
|
||||
sl += line[0] != sign
|
||||
t += ''.join(s[sl:])
|
||||
return t
|
||||
|
||||
|
||||
class Snapshot:
|
||||
id = None
|
||||
object_type = None # (formdef, carddef, blockdef, workflow, data_source, etc.)
|
||||
|
@ -36,14 +128,13 @@ class Snapshot:
|
|||
user_id = None
|
||||
comment = None
|
||||
serialization = None
|
||||
patch = None
|
||||
label = None # (named snapshot)
|
||||
|
||||
# cache
|
||||
_instance = None
|
||||
_user = None
|
||||
|
||||
WCS_MAX_LEN = 1000000
|
||||
|
||||
@classmethod
|
||||
def snap(cls, instance, comment=None, label=None):
|
||||
obj = cls()
|
||||
|
@ -52,18 +143,43 @@ class Snapshot:
|
|||
obj.timestamp = now()
|
||||
if get_session():
|
||||
obj.user_id = get_session().user
|
||||
obj.serialization = ET.tostring(instance.export_to_xml(include_id=True)).decode('utf-8')
|
||||
tree = instance.export_to_xml(include_id=True)
|
||||
obj.serialization = ET.tostring(tree).decode('utf-8')
|
||||
obj.comment = str(comment) if comment else None
|
||||
obj.label = label
|
||||
latest = cls.get_latest(obj.object_type, obj.object_id)
|
||||
if label is not None or latest is None or obj.serialization != latest.serialization:
|
||||
# save snapshot if there are changes or an explicit label was
|
||||
# given.
|
||||
if label is None and len(obj.serialization) > cls.WCS_MAX_LEN:
|
||||
# keep only latest snapshot for big objects
|
||||
# (typically workflows with embedded documents)
|
||||
for old_snapshot in cls.select_object_history(instance, clause=[Null('label')]):
|
||||
cls.remove_object(old_snapshot.id)
|
||||
|
||||
latest_complete = cls.get_latest(obj.object_type, obj.object_id, complete=True)
|
||||
if latest_complete is None:
|
||||
# no complete snapshot, store it, with serialization and no patch
|
||||
obj.store()
|
||||
return
|
||||
|
||||
# get patch beetween latest serialization and current instance
|
||||
# indent xml to minimize patch
|
||||
latest_tree = ET.fromstring(latest_complete.serialization)
|
||||
indent(tree)
|
||||
indent(latest_tree)
|
||||
patch = make_patch(ET.tostring(latest_tree).decode('utf-8'), ET.tostring(tree).decode('utf-8'))
|
||||
# should we store a snapshot ?
|
||||
store_snapshot = True
|
||||
if label is None:
|
||||
# compare with patch of latest snapshot
|
||||
latest = cls.get_latest(obj.object_type, obj.object_id)
|
||||
if latest.patch and patch == latest.patch:
|
||||
# previous snapshot contains a patch (but no serialization)
|
||||
# and the current patch is the same as in the previous snapshot
|
||||
store_snapshot = False
|
||||
elif latest.serialization and not patch:
|
||||
# previous snapshot contains a serialization (but no patch)
|
||||
# and there is no difference (no patch)
|
||||
store_snapshot = False
|
||||
|
||||
if store_snapshot:
|
||||
if len(obj.serialization) > len(patch):
|
||||
# serialization is bigger than patch, store patch
|
||||
obj.serialization = None
|
||||
obj.patch = patch
|
||||
# else: keep serialization and ignore patch
|
||||
obj.store()
|
||||
|
||||
def get_object_class(self):
|
||||
|
@ -80,10 +196,22 @@ class Snapshot:
|
|||
return klass
|
||||
raise KeyError('no class for object type: %s' % self.object_type)
|
||||
|
||||
def get_serialization(self):
|
||||
# there is a complete serialization
|
||||
if self.serialization:
|
||||
return self.serialization
|
||||
|
||||
# get latest version with serialization
|
||||
latest_complete = self.__class__.get_latest(self.object_type, self.object_id, complete=True)
|
||||
latest_tree = ET.fromstring(latest_complete.serialization)
|
||||
indent(latest_tree)
|
||||
serialization = apply_patch(ET.tostring(latest_tree).decode('utf-8'), self.patch or '')
|
||||
return serialization
|
||||
|
||||
@property
|
||||
def instance(self):
|
||||
if self._instance is None:
|
||||
tree = ET.fromstring(self.serialization)
|
||||
tree = ET.fromstring(self.get_serialization())
|
||||
self._instance = self.get_object_class().import_from_xml_tree(
|
||||
tree,
|
||||
include_id=True,
|
||||
|
|
24
wcs/sql.py
24
wcs/sql.py
|
@ -1057,6 +1057,7 @@ def do_snapshots_table():
|
|||
user_id VARCHAR,
|
||||
comment TEXT,
|
||||
serialization TEXT,
|
||||
patch TEXT,
|
||||
label VARCHAR
|
||||
)'''
|
||||
% table_name
|
||||
|
@ -1069,6 +1070,10 @@ def do_snapshots_table():
|
|||
)
|
||||
existing_fields = {x[0] for x in cur.fetchall()}
|
||||
|
||||
# migrations
|
||||
if 'patch' not in existing_fields:
|
||||
cur.execute('''ALTER TABLE %s ADD COLUMN patch TEXT''' % table_name)
|
||||
|
||||
needed_fields = {x[0] for x in Snapshot._table_static_fields}
|
||||
|
||||
# delete obsolete fields
|
||||
|
@ -2959,9 +2964,10 @@ class Snapshot(SqlMixin, wcs.snapshots.Snapshot):
|
|||
('user_id', 'varchar'),
|
||||
('comment', 'text'),
|
||||
('serialization', 'text'),
|
||||
('patch', 'text'),
|
||||
('label', 'varchar'),
|
||||
]
|
||||
_table_select_skipped_fields = ['serialization']
|
||||
_table_select_skipped_fields = ['serialization', 'patch']
|
||||
|
||||
@guard_postgres
|
||||
@invalidate_substitution_cache
|
||||
|
@ -3018,13 +3024,16 @@ class Snapshot(SqlMixin, wcs.snapshots.Snapshot):
|
|||
return []
|
||||
|
||||
@classmethod
|
||||
def get_latest(cls, object_type, object_id):
|
||||
def get_latest(cls, object_type, object_id, complete=False):
|
||||
conn, cur = get_connection_and_cursor()
|
||||
sql_statement = '''SELECT id FROM snapshots
|
||||
WHERE object_type = %(object_type)s
|
||||
AND object_id = %(object_id)s
|
||||
WHERE object_type = %%(object_type)s
|
||||
AND object_id = %%(object_id)s
|
||||
%s
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT 1'''
|
||||
LIMIT 1''' % (
|
||||
'AND serialization IS NOT NULL' if complete else ''
|
||||
)
|
||||
cur.execute(sql_statement, {'object_type': object_type, 'object_id': object_id})
|
||||
row = cur.fetchone()
|
||||
conn.commit()
|
||||
|
@ -3441,7 +3450,7 @@ def get_period_total(
|
|||
# latest migration, number + description (description is not used
|
||||
# programmaticaly but will make sure git conflicts if two migrations are
|
||||
# separately added with the same number)
|
||||
SQL_LEVEL = (53, 'add kind column on logged_errors table')
|
||||
SQL_LEVEL = (54, 'add patch column on snapshot table')
|
||||
|
||||
|
||||
def migrate_global_views(conn, cur):
|
||||
|
@ -3617,8 +3626,9 @@ def migrate():
|
|||
continue
|
||||
for formdata in formdef.data_class().select_iterator():
|
||||
formdata._set_auto_fields(cur) # build digests
|
||||
if sql_level < 42:
|
||||
if sql_level < 54:
|
||||
# 42: create snapshots table
|
||||
# 54: add patch column
|
||||
do_snapshots_table()
|
||||
if sql_level < 53:
|
||||
# 47: store LoggedErrors in SQL
|
||||
|
|
Loading…
Reference in New Issue