wcs/wcs/qommon/storage.py

779 lines
28 KiB
Python

# w.c.s. - web application for online forms
# Copyright (C) 2005-2010 Entr'ouvert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
import errno
import operator
import os
import time
import pickle
import os.path
import shutil
import sys
import tempfile
from django.utils import six
from django.utils.encoding import force_bytes
from django.utils.six.moves import _thread
from .vendor import locket
from quixote import get_publisher
from . import PICKLE_KWARGS
def cache_umask():
global process_umask
process_umask = os.umask(0)
os.umask(process_umask)
# cache umask when loading up the module
cache_umask()
def _take(objects, limit, offset=0):
for y in objects:
if offset:
offset -= 1
continue
if limit:
limit -= 1
elif limit == 0:
break
elif limit is None:
pass
yield y
def lax_int(s):
try:
return int(s)
except ValueError:
return -1
def fix_key(k):
# insure key can be inserted in filesystem
if not k: return k
return str(k).replace('/', '-')
def atomic_write(path, content, async_op=False):
'''Rewrite a complete file automatically, that is write to new file with
temporary name, fsync, then rename to final name. Use threads to remove blocking.'''
def doit():
dirname = os.path.dirname(path)
fd, temp = tempfile.mkstemp(dir=dirname,
prefix='.tmp-'+os.path.basename(path)+'-')
os.fchmod(fd, 0o666 & ~process_umask)
f = os.fdopen(fd, "wb")
if hasattr(content, 'read'):
# file pointer
def read100k():
return content.read(100000)
for piece in iter(read100k, b''):
f.write(piece)
else:
f.write(content)
f.flush()
os.fsync(f.fileno())
f.close()
os.rename(temp, path)
if async_op:
_thread.start_new_thread(doit, ())
else:
doit()
class Criteria(object):
def __init__(self, attribute, value):
self.attribute = attribute
self.value = value
# Python 3 requires comparisons to disparate types, this means we need
# to create a null value of the appropriate type, so None values can
# still be sorted.
self.typed_none = ''
if isinstance(self.value, bool):
self.typed_none = False
elif isinstance(self.value, six.integer_types + (float,)):
self.typed_none = -sys.maxsize
elif isinstance(self.value, time.struct_time):
self.typed_none = time.gmtime(-10**10) # 1653
def build_lambda(self):
return lambda x: self.op(getattr(x, self.attribute, None) or self.typed_none, self.value)
class Less(Criteria):
op = operator.lt
class Greater(Criteria):
op = operator.gt
class Equal(Criteria):
op = operator.eq
class NotEqual(Criteria):
op = operator.ne
class LessOrEqual(Criteria):
op = operator.le
class GreaterOrEqual(Criteria):
op = operator.ge
class Contains(Criteria):
op = operator.contains
def build_lambda(self):
return lambda x: self.op(self.value, getattr(x, self.attribute, ''))
class NotContains(Criteria):
op = operator.contains
def build_lambda(self):
return lambda x: not self.op(self.value, getattr(x, self.attribute, ''))
class Intersects(Criteria):
def build_lambda(self):
value = set(self.value)
def func(x):
try:
return value.intersection(set(getattr(x, self.attribute, []) or []))
except KeyError:
# this may happen if used to check a formdata field that didn't
# exist when the formdata was created.
return False
return func
class Or(Criteria):
def __init__(self, criterias, **kwargs):
self.criterias = criterias
def build_lambda(self):
func = lambda x: False
def combine_callables(x1, x2):
return lambda x: x1(x) or x2(x)
for element in self.criterias:
func = combine_callables(func, element.build_lambda())
return func
class And(Criteria):
def __init__(self, criterias, **kwargs):
self.criterias = criterias
def build_lambda(self):
func = lambda x: True
def combine_callables(x1, x2):
return lambda x: x1(x) and x2(x)
for element in self.criterias:
func = combine_callables(func, element.build_lambda())
return func
class ILike(Criteria):
def build_lambda(self):
return lambda x: self.value.lower() in (getattr(x, self.attribute, '') or '').lower()
class FtsMatch(Criteria):
def __init__(self, value):
self.value = value
def build_lambda(self):
raise NotImplementedError()
class NotNull(Criteria):
def __init__(self, attribute):
self.attribute = attribute
def build_lambda(self):
return lambda x: getattr(x, self.attribute, None) is not None
class Null(Criteria):
def __init__(self, attribute):
self.attribute = attribute
def build_lambda(self):
return lambda x: getattr(x, self.attribute, None) is None
def parse_clause(clause):
# creates a callable out of a clause
# (attribute, operator, value)
if callable(clause): # already a callable
return clause
def combine_callables(x1, x2):
return lambda x: x1(x) and x2(x)
func = lambda x: True
for element in clause:
if callable(element):
func = combine_callables(func, element)
else:
func = combine_callables(func, element.build_lambda())
return func
class StorageIndexException(Exception):
pass
class StorableObject(object):
_indexes = None
_hashed_indexes = None
_filename = None # None, unless must be saved to a specific location
def __init__(self, id = None):
self.id = id
@classmethod
def get_table_name(cls):
return cls._names
@classmethod
def get_objects_dir(cls):
return os.path.join(get_publisher().app_dir, cls.get_table_name())
@classmethod
def keys(cls):
if not os.path.exists(cls.get_objects_dir()):
return []
return [fix_key(x) for x in os.listdir(cls.get_objects_dir()) if x[0] != '.']
@classmethod
def values(cls, ignore_errors=False, ignore_migration=True):
values = [cls.get(x, ignore_errors=ignore_errors, ignore_migration=True) for x in cls.keys()]
return [x for x in values if x is not None]
@classmethod
def items(cls):
return [(x, cls.get(x)) for x in cls.keys()]
@classmethod
def count(cls, clause=None):
if clause:
return len(cls.select(clause))
return len(cls.keys())
@classmethod
def select(cls, clause=None, order_by=None, ignore_errors=False,
ignore_migration=False, limit=None, offset=None, iterator=False, **kwargs):
# iterator: only for compatibility with sql select()
keys = cls.keys()
objects = (cls.get(k, ignore_errors=ignore_errors,
ignore_migration=ignore_migration, **kwargs) for k in keys)
if ignore_errors:
objects = (x for x in objects if x is not None)
if clause:
clause_function = parse_clause(clause)
objects = (x for x in objects if clause_function(x))
if order_by:
order_by = str(order_by)
if order_by[0] == '-':
reverse = True
order_by = order_by[1:]
else:
reverse = False
# only list can be sorted
objects = list(objects)
if order_by == 'id':
key_function = lambda x: lax_int(x.id)
elif order_by == 'name':
# proper collation should be done but it's messy to get working
# on all systems so we go the cheap and almost ok way.
from .misc import simplify
key_function = lambda x: simplify(x.name)
elif order_by.endswith('_time'):
typed_none = time.gmtime(-10**10) # 1653
key_function = lambda x: getattr(x, order_by) or typed_none
else:
key_function = lambda x: getattr(x, order_by)
objects.sort(key=key_function)
if reverse:
objects.reverse()
if limit or offset:
objects = _take(objects, limit, offset)
return list(objects)
@classmethod
def select_iterator(cls, **kwargs):
for obj in cls.select(**kwargs):
yield obj
@classmethod
def has_key(cls, id):
filename = os.path.join(cls.get_objects_dir(), fix_key(id))
return os.path.exists(force_bytes(filename, 'utf-8'))
@classmethod
def get_new_id(cls, create=False):
keys = cls.keys()
if not keys:
id = 1
else:
id = max([lax_int(x) for x in keys]) + 1
if id == 0:
id = len(keys)+1
if create:
objects_dir = cls.get_objects_dir()
object_filename = os.path.join(objects_dir, fix_key(id))
try:
fd = os.open(object_filename, os.O_CREAT | os.O_EXCL)
except OSError:
return cls.get_new_id(create=True)
os.close(fd)
return str(id)
@classmethod
def get(cls, id, ignore_errors=False, ignore_migration=False, **kwargs):
if id is None:
if ignore_errors:
return None
else:
raise KeyError()
filename = os.path.join(cls.get_objects_dir(), fix_key(id))
return cls.get_filename(filename, ignore_errors=ignore_errors,
ignore_migration=ignore_migration,
**kwargs)
@classmethod
def get_ids(cls, ids, ignore_errors=False, keep_order=False):
objects = []
for x in ids:
obj = cls.get(x, ignore_errors=ignore_errors)
if obj is not None:
objects.append(obj)
return objects
@classmethod
def get_on_index(cls, id, index, ignore_errors=False, ignore_migration=False):
if not cls._indexes:
raise KeyError()
objects_dir = cls.get_objects_dir()
index_dir = objects_dir + '-' + index
if not os.path.exists(index_dir):
cls.rebuild_indexes()
filename = os.path.join(index_dir, str(fix_key(id)))
return cls.get_filename(filename, ignore_errors=ignore_errors,
ignore_migration=ignore_migration)
@classmethod
def get_ids_with_indexed_value(cls, index, value, auto_fallback=True):
objects_dir = cls.get_objects_dir()
index_dir = os.path.join(objects_dir, '.indexes', str(index))
index_file = os.path.join(index_dir, '%s-%s' % (index, fix_key(value)))
if not os.path.exists(index_dir):
if auto_fallback is False:
raise StorageIndexException()
try:
cls.rebuild_indexes()
except StorageIndexException:
values = cls.select(ignore_errors=True)
return [x for x in values if getattr(x, index) == value]
if not os.path.exists(index_file):
return []
return pickle.load(open(index_file, 'rb'))
@classmethod
def get_with_indexed_value(cls, index, value, ignore_errors = False):
ids = cls.get_ids_with_indexed_value(str(index), str(value))
for x in ids:
obj = cls.get(x, ignore_errors=ignore_errors)
if obj is not None:
yield obj
@classmethod
def storage_load(cls, fd):
if get_publisher() and get_publisher().unpickler_class:
unpickler = get_publisher().unpickler_class
else:
unpickler = pickle.Unpickler
return unpickler(fd, **PICKLE_KWARGS).load()
@classmethod
def get_filename(cls, filename, ignore_errors=False, ignore_migration=False, **kwargs):
try:
fd = open(force_bytes(filename, 'utf-8'), 'rb')
o = cls.storage_load(fd, **kwargs)
except IOError:
if ignore_errors:
return None
raise KeyError()
except ImportError as e:
if ignore_errors:
return None
raise KeyError()
except EOFError as e:
# maybe it's being written to, loop for a while to see
current_position = fd.tell()
fd.close()
for i in range(10):
time.sleep(0.01)
if current_position != os.stat(filename).st_size:
return cls.get_filename(filename, ignore_errors=ignore_errors,
ignore_migration=ignore_migration)
if ignore_errors:
return None
raise KeyError()
o.__class__ = cls
if not ignore_migration:
o.id = str(o.id) # makes sure 'id' is a string
if hasattr(cls, 'migrate'):
o.migrate()
return o
@classmethod
def rebuild_indexes(cls, indexes=[]):
if not (cls._indexes or cls._hashed_indexes):
return
if not indexes:
indexes = (cls._hashed_indexes or []) + (cls._indexes or [])
objects_dir = cls.get_objects_dir()
hashed_indexes = {}
for index in cls._hashed_indexes or []:
if index not in indexes:
continue
index_dir = os.path.join(objects_dir, '.indexes', index)
if not os.path.exists(index_dir):
try:
os.makedirs(index_dir)
except OSError:
raise StorageIndexException()
for object in cls.values(ignore_errors=True, ignore_migration=True):
object_filename = os.path.join(objects_dir, fix_key(object.id))
relative_object_filename = os.path.join('..', cls.get_table_name(), fix_key(object.id))
for index in cls._indexes or []:
if index not in indexes:
continue
if not hasattr(object, index) or getattr(object, index) is None:
continue
index_dir = objects_dir + '-' + index
link_name = os.path.join(index_dir, fix_key(str(getattr(object, index))))
try:
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(object_filename, link_name)
except OSError as exc:
if exc.errno == 2:
os.mkdir(index_dir)
elif exc.errno == 17:
try:
os.unlink(link_name)
except OSError as exc2:
if exc2.errno != 2: # no such file or directory
raise
# link got created in a different process, move
# along.
continue
else:
raise
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(object_filename, link_name)
for index in cls._hashed_indexes or []:
if index not in indexes:
continue
if not hasattr(object, index) or getattr(object, index) is None:
continue
attribute = getattr(object, index)
if type(attribute) is dict:
attribute = attribute.values()
elif type(attribute) not in (tuple, list, set):
attribute = [attribute]
for attr in attribute:
attr_value = fix_key(attr)
index_name = '%s-%s' % (index, attr_value)
if not index_name in hashed_indexes:
hashed_indexes[index_name] = []
hashed_indexes[index_name].append(str(object.id))
for index, content in hashed_indexes.items():
index_key = index.split('-')[0]
if index_key not in indexes:
continue
index_file = os.path.join(objects_dir, '.indexes', index_key, index)
pickle.dump(content, open(index_file, 'wb'), protocol=2)
for index in cls._hashed_indexes or []:
if index not in indexes:
continue
index_dir = os.path.join(objects_dir, '.indexes', index)
for filename in os.listdir(index_dir):
if filename not in hashed_indexes:
os.unlink(os.path.join(index_dir, filename))
def get_object_filename(self):
if self._filename:
if self._filename[0] == '/':
return self._filename
else:
return os.path.join(get_publisher().app_dir, self._filename)
else:
objects_dir = self.get_objects_dir()
return os.path.join(objects_dir, fix_key(self.id))
@classmethod
def storage_dumps(cls, object):
return pickle.dumps(object, protocol=2)
def store(self, async_op=False):
objects_dir = self.get_objects_dir()
new_object = False
if self._filename:
if self._filename[0] == '/':
object_filename = self._filename
relative_object_filename = None
else:
object_filename = os.path.join(get_publisher().app_dir, self._filename)
relative_object_filename = os.path.join('..', self._filename)
else:
if not os.path.exists(objects_dir):
try:
os.mkdir(objects_dir)
except OSError as error:
if error.errno != 17: # 17 == Directory exists
raise
if self.id is None:
self.id = self.get_new_id(create=True)
new_object = True
object_filename = os.path.join(objects_dir, fix_key(self.id))
relative_object_filename = os.path.join('..', self.get_table_name(), fix_key(self.id))
previous_object_value = None
if not new_object and (self._indexes or self._hashed_indexes):
previous_object_value = self.get_filename(object_filename,
ignore_errors=True, ignore_migration=True)
s = self.storage_dumps(self)
atomic_write(object_filename, s, async_op)
# update last modified time
if os.path.exists(objects_dir):
os.utime(objects_dir, None)
with locket.lock_file(objects_dir + '.lock.index'):
try:
self.update_indexes(previous_object_value, relative_object_filename)
except:
# something failed, we can't keep using possibly broken indexes, so
# we notify of the bug and remove the indexes
get_publisher().notify_of_exception(sys.exc_info(), context='[STORAGE]')
self.destroy_indexes()
@classmethod
def destroy_indexes(cls):
objects_dir = cls.get_objects_dir()
directories_to_trash = []
directories_to_wipe = []
for index in cls._indexes or []:
index_dir = objects_dir + '-' + index
directories_to_trash.append(index_dir)
directories_to_trash.append(os.path.join(objects_dir, '.indexes'))
for directory in directories_to_trash:
if not os.path.exists(directory):
continue
i = 0
while True:
trashed_index_name = directory + '.trash-%s' % i
i += 1
try:
os.mkdir(trashed_index_name)
except OSError:
continue
try:
os.rename(directory, os.path.join(trashed_index_name, 'idx'))
except OSError:
continue
directories_to_wipe.append(trashed_index_name)
break
for directory in directories_to_wipe:
shutil.rmtree(directory)
def update_indexes(self, previous_object_value, relative_object_filename):
objects_dir = self.get_objects_dir()
rebuilt_indexes = False
for index in self._indexes or []:
if not hasattr(self, index) or getattr(self, index) is None:
continue
index_dir = objects_dir + '-' + index
link_name = os.path.join(index_dir, fix_key(str(getattr(self, index))))
if previous_object_value:
old_link_name = os.path.join(index_dir,
fix_key(str(getattr(previous_object_value, index))))
if os.path.exists(old_link_name):
if old_link_name == link_name:
continue
os.unlink(old_link_name)
try:
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(self.get_object_filename(), link_name)
except OSError as exc:
if exc.errno == 2:
os.mkdir(index_dir)
if not rebuilt_indexes:
# perhaps index dir got removed; rebuild it before
# adding elements to it.
self.rebuild_indexes()
rebuilt_indexes = True
elif exc.errno == 17:
os.unlink(link_name)
else:
raise
if not rebuilt_indexes:
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(self.get_object_filename(), link_name)
for index in self._hashed_indexes or []:
index_dir = os.path.join(objects_dir, '.indexes', index)
if not os.path.exists(index_dir):
try:
os.makedirs(index_dir)
except OSError as e:
if e.errno == errno.EEXIST: # File exists
pass
old_value = []
if type(getattr(self, index)) is dict:
new_value = getattr(self, index).values()
if previous_object_value:
old_value = getattr(previous_object_value, index)
if old_value is None:
old_value = []
else:
old_value = old_value.values()
elif type(getattr(self, index)) in (tuple, list, set):
new_value = getattr(self, index)
if previous_object_value:
old_value = getattr(previous_object_value, index)
if old_value is None:
old_value = []
else:
new_value = [getattr(self, index)]
if previous_object_value:
old_raw_value = getattr(previous_object_value, index)
if type(old_raw_value) is dict:
old_value = old_raw_value.values()
elif type(old_raw_value) in (tuple, list, set):
old_value = old_raw_value
else:
old_value = [old_raw_value]
for oldv in old_value:
if oldv in new_value:
continue
old_index_name = '%s-%s' % (index, fix_key(oldv))
old_index_file = os.path.join(index_dir, old_index_name)
if os.path.exists(old_index_file):
ids = [str(x) for x in pickle.load(open(old_index_file, 'rb'))]
if str(self.id) in ids:
ids.remove(str(self.id))
pickle.dump(ids, open(old_index_file, 'wb'), protocol=2)
for newv in new_value:
if newv in old_value:
continue
index_name = '%s-%s' % (index, fix_key(newv))
index_file = os.path.join(index_dir, index_name)
if os.path.exists(index_file):
ids = [str(x) for x in pickle.load(open(index_file, 'rb'))]
else:
ids = []
if not str(self.id) in ids:
ids.append(str(self.id))
pickle.dump(ids, open(index_file, 'wb'), protocol=2)
@classmethod
def volatile(cls):
o = cls()
o.id = None
return o
@classmethod
def remove_object(cls, id):
objects_dir = cls.get_objects_dir()
if cls._indexes or cls._hashed_indexes:
object = cls.get(id)
for index in cls._indexes or []:
if not hasattr(object, index) or getattr(object, index) is None:
continue
index_dir = objects_dir + '-' + index
link_name = os.path.join(index_dir, fix_key(str(getattr(object, index))))
try:
os.unlink(link_name)
except OSError:
pass
index_dir = os.path.join(objects_dir, '.indexes')
for index in cls._hashed_indexes or []:
attribute = getattr(object, index)
if type(attribute) not in (tuple, list, set):
attribute = [attribute]
for attr in attribute:
attr_value = fix_key(attr)
index_name = '%s-%s' % (index, attr_value)
index_file = os.path.join(index_dir, index, index_name)
if os.path.exists(index_file):
ids = [str(x) for x in pickle.load(open(index_file, 'rb'))]
if str(object.id) in ids:
ids.remove(str(object.id))
pickle.dump(ids, open(index_file, 'wb'), protocol=2)
os.unlink(os.path.join(objects_dir, fix_key(id)))
def remove_self(self):
self.remove_object(self.id)
@classmethod
def wipe(cls):
tmpdir = tempfile.mkdtemp(prefix='wiping', dir=os.path.join(get_publisher().app_dir))
dirs_to_move = []
objects_dir = cls.get_objects_dir()
dirs_to_move.append(objects_dir)
for index in cls._indexes or []:
index_dir = objects_dir + '-' + index
dirs_to_move.append(index_dir)
for directory in dirs_to_move:
if os.path.exists(directory):
os.rename(directory, os.path.join(tmpdir, os.path.basename(directory)))
shutil.rmtree(tmpdir)
def __repr__(self):
if hasattr(self, 'display_name'):
display_name = '%r ' % self.display_name
elif hasattr(self, 'get_display_name'):
display_name = '%r ' % self.get_display_name()
elif hasattr(self, 'name'):
display_name = '%r ' % self.name
else:
display_name = ''
return '<%s %sid:%s>' % (self.__class__.__name__, display_name, self.id)