wcs/wcs/qommon/storage.py

685 lines
24 KiB
Python

# w.c.s. - web application for online forms
# Copyright (C) 2005-2010 Entr'ouvert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
import operator
import os
import time
import pickle
import os.path
import shutil
import sys
import tempfile
try:
import thread as _thread
except ImportError:
import dummy_thread as _thread
from quixote import get_publisher
def _take(objects, limit, offset=0):
for y in objects:
if offset:
offset -= 1
continue
if limit:
limit -= 1
elif limit == 0:
break
elif limit is None:
pass
yield y
def lax_int(s):
try:
return int(s)
except ValueError:
return -1
def fix_key(k):
# insure key can be inserted in filesystem
if not k: return k
return str(k).replace('/', '-')
def atomic_write(path, content, async=False):
'''Rewrite a complete file automatically, that is write to new file with
temporary name, fsync, then rename to final name. Use threads to remove blocking.'''
def doit():
dirname = os.path.dirname(path)
fd, temp = tempfile.mkstemp(dir=dirname,
prefix='.tmp-'+os.path.basename(path)+'-')
f = os.fdopen(fd, "w")
if hasattr(content, 'read'):
# file pointer
def read100k():
return content.read(100000)
for piece in iter(read100k, ''):
f.write(piece)
else:
f.write(content)
f.flush()
os.fsync(f.fileno())
f.close()
os.rename(temp, path)
if async:
_thread.start_new_thread(doit, ())
else:
doit()
class Criteria(object):
def __init__(self, attribute, value):
self.attribute = attribute
self.value = value
def build_lambda(self):
return lambda x: self.op(getattr(x, self.attribute), self.value)
class Less(Criteria):
op = operator.lt
class Greater(Criteria):
op = operator.gt
class Equal(Criteria):
op = operator.eq
class NotEqual(Criteria):
op = operator.ne
class LessOrEqual(Criteria):
op = operator.le
class GreaterOrEqual(Criteria):
op = operator.ge
class Contains(Criteria):
op = operator.contains
def build_lambda(self):
return lambda x: self.op(self.value, getattr(x, self.attribute))
class NotContains(Criteria):
op = operator.contains
def build_lambda(self):
return lambda x: not self.op(self.value, getattr(x, self.attribute))
class Intersects(Criteria):
def build_lambda(self):
value = set(self.value)
return lambda x: value.intersection(set(getattr(x, self.attribute) or []))
class Or(Criteria):
def __init__(self, criterias):
self.criterias = criterias
def build_lambda(self):
func = lambda x: False
def combine_callables(x1, x2):
return lambda x: x1(x) or x2(x)
for element in self.criterias:
func = combine_callables(func, element.build_lambda())
return func
class And(Criteria):
def __init__(self, criterias):
self.criterias = criterias
def build_lambda(self):
func = lambda x: True
def combine_callables(x1, x2):
return lambda x: x1(x) and x2(x)
for element in self.criterias:
func = combine_callables(func, element.build_lambda())
return func
class ILike(Criteria):
def build_lambda(self):
return lambda x: self.value.lower() in (getattr(x, self.attribute) or '').lower()
def parse_clause(clause):
# creates a callable out of a clause
# (attribute, operator, value)
if callable(clause): # already a callable
return clause
def combine_callables(x1, x2):
return lambda x: x1(x) and x2(x)
func = lambda x: True
for element in clause:
if callable(element):
func = combine_callables(func, element)
else:
func = combine_callables(func, element.build_lambda())
return func
class StorageIndexException(Exception):
pass
class StorableObject(object):
_indexes = None
_hashed_indexes = None
_filename = None # None, unless must be saved to a specific location
def __init__(self, id = None):
self.id = id
if get_publisher() and not self.id:
self.id = self.get_new_id()
def get_table_name(cls):
return cls._names
get_table_name = classmethod(get_table_name)
def get_objects_dir(cls):
return os.path.join(get_publisher().app_dir, cls.get_table_name())
get_objects_dir = classmethod(get_objects_dir)
def keys(cls):
if not os.path.exists(cls.get_objects_dir()):
return []
return [fix_key(x) for x in os.listdir(cls.get_objects_dir()) if x[0] != '.']
keys = classmethod(keys)
def values(cls, ignore_errors=False, ignore_migration=True):
values = [cls.get(x, ignore_errors=ignore_errors, ignore_migration=True) for x in cls.keys()]
return [x for x in values if x is not None]
values = classmethod(values)
def items(cls):
return [(x, cls.get(x)) for x in cls.keys()]
items = classmethod(items)
def count(cls, clause=None):
if clause:
return len(cls.select(clause))
return len(cls.keys())
count = classmethod(count)
def select(cls, clause=None, order_by=None, ignore_errors=False, limit=None, offset=None):
keys = cls.keys()
objects = (cls.get(k, ignore_errors = ignore_errors) for k in keys)
if ignore_errors:
objects = (x for x in objects if x is not None)
if clause:
clause_function = parse_clause(clause)
objects = (x for x in objects if clause_function(x))
if order_by:
order_by = str(order_by)
if order_by[0] == '-':
reverse = True
order_by = order_by[1:]
else:
reverse = False
# only list can be sorted
objects = list(objects)
if order_by == 'id':
cmp_function = lambda x, y: cmp(lax_int(x.id), lax_int(y.id))
else:
cmp_function = lambda x, y: cmp(getattr(x, order_by), getattr(y, order_by))
objects.sort(cmp_function)
if reverse:
objects.reverse()
if limit or offset:
objects = _take(objects, limit, offset)
return list(objects)
select = classmethod(select)
def has_key(cls, id):
filename = os.path.join(cls.get_objects_dir(), fix_key(id))
return os.path.exists(filename)
has_key = classmethod(has_key)
def get_new_id(cls, create=False):
keys = cls.keys()
if not keys:
id = 1
else:
id = max([lax_int(x) for x in keys]) + 1
if id == 0:
id = len(keys)+1
if create:
objects_dir = cls.get_objects_dir()
object_filename = os.path.join(objects_dir, fix_key(id))
try:
fd = os.open(object_filename, os.O_CREAT | os.O_EXCL)
except OSError:
return cls.get_new_id(create=True)
os.close(fd)
return str(id)
get_new_id = classmethod(get_new_id)
def get(cls, id, ignore_errors=False, ignore_migration=False):
if id is None:
if ignore_errors:
return None
else:
raise KeyError()
filename = os.path.join(cls.get_objects_dir(), fix_key(id))
return cls.get_filename(filename, ignore_errors=ignore_errors,
ignore_migration=ignore_migration)
get = classmethod(get)
def get_ids(cls, ids, ignore_errors=False, keep_order=False):
objects = []
for x in ids:
obj = cls.get(x, ignore_errors=ignore_errors)
if obj is not None:
objects.append(obj)
return objects
get_ids = classmethod(get_ids)
def get_on_index(cls, id, index, ignore_errors=False, ignore_migration=False):
if not cls._indexes:
raise KeyError()
objects_dir = cls.get_objects_dir()
index_dir = objects_dir + '-' + index
if not os.path.exists(index_dir):
cls.rebuild_indexes()
filename = os.path.join(index_dir, str(fix_key(id)))
return cls.get_filename(filename, ignore_errors=ignore_errors,
ignore_migration=ignore_migration)
get_on_index = classmethod(get_on_index)
def get_ids_with_indexed_value(cls, index, value, auto_fallback=True):
objects_dir = cls.get_objects_dir()
index_dir = os.path.join(objects_dir, '.indexes', str(index))
index_file = os.path.join(index_dir, '%s-%s' % (index, fix_key(value)))
if not os.path.exists(index_dir):
if auto_fallback is False:
raise StorageIndexException()
try:
cls.rebuild_indexes()
except StorageIndexException:
values = cls.select(ignore_errors=True)
return [x for x in values if getattr(x, index) == value]
if not os.path.exists(index_file):
return []
return pickle.load(file(index_file))
get_ids_with_indexed_value = classmethod(get_ids_with_indexed_value)
def get_with_indexed_value(cls, index, value, ignore_errors = False):
ids = cls.get_ids_with_indexed_value(str(index), str(value))
return cls.get_ids(ids)
get_with_indexed_value = classmethod(get_with_indexed_value)
def storage_load(cls, fd):
if get_publisher() and get_publisher().unpickler_class:
unpickler = get_publisher().unpickler_class
else:
unpickler = pickle.Unpickler
return unpickler(fd).load()
storage_load = classmethod(storage_load)
def get_filename(cls, filename, ignore_errors=False, ignore_migration=False):
try:
o = cls.storage_load(file(filename))
except IOError:
if ignore_errors:
return None
raise KeyError()
except (EOFError, ImportError), e:
if ignore_errors:
return None
raise KeyError()
o.__class__ = cls
if not ignore_migration:
o.id = str(o.id) # makes sure 'id' is a string
if hasattr(cls, 'migrate'):
o.migrate()
return o
get_filename = classmethod(get_filename)
def rebuild_indexes(cls, indexes=[]):
if not (cls._indexes or cls._hashed_indexes):
return
if not indexes:
indexes = (cls._hashed_indexes or []) + (cls._indexes or [])
objects_dir = cls.get_objects_dir()
hashed_indexes = {}
for index in cls._hashed_indexes or []:
if index not in indexes:
continue
index_dir = os.path.join(objects_dir, '.indexes', index)
if not os.path.exists(index_dir):
try:
os.makedirs(index_dir)
except OSError:
raise StorageIndexException()
for object in cls.values(ignore_errors=True, ignore_migration=True):
object_filename = os.path.join(objects_dir, fix_key(object.id))
relative_object_filename = os.path.join('..', cls.get_table_name(), fix_key(object.id))
for index in cls._indexes or []:
if index not in indexes:
continue
if not hasattr(object, index) or getattr(object, index) is None:
continue
index_dir = objects_dir + '-' + index
link_name = os.path.join(index_dir, fix_key(str(getattr(object, index))))
try:
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(object_filename, link_name)
except OSError, exc:
if exc.errno == 2:
os.mkdir(index_dir)
elif exc.errno == 17:
os.unlink(link_name)
else:
raise
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(object_filename, link_name)
for index in cls._hashed_indexes or []:
if index not in indexes:
continue
if not hasattr(object, index) or getattr(object, index) is None:
continue
attribute = getattr(object, index)
if type(attribute) is dict:
attribute = attribute.values()
elif type(attribute) not in (tuple, list, set):
attribute = [attribute]
for attr in attribute:
attr_value = fix_key(attr)
index_name = '%s-%s' % (index, attr_value)
if not index_name in hashed_indexes:
hashed_indexes[index_name] = []
hashed_indexes[index_name].append(object.id)
for index, content in hashed_indexes.items():
index_key = index.split('-')[0]
if index_key not in indexes:
continue
index_file = os.path.join(objects_dir, '.indexes', index_key, index)
pickle.dump(content, file(index_file, 'w'))
for index in cls._hashed_indexes or []:
if index not in indexes:
continue
index_dir = os.path.join(objects_dir, '.indexes', index)
for filename in os.listdir(index_dir):
if filename not in hashed_indexes:
os.unlink(os.path.join(index_dir, filename))
rebuild_indexes = classmethod(rebuild_indexes)
def get_object_filename(self):
if self._filename:
if self._filename[0] == '/':
return self._filename
else:
return os.path.join(get_publisher().app_dir, self._filename)
else:
objects_dir = self.get_objects_dir()
return os.path.join(objects_dir, fix_key(self.id))
def storage_dumps(cls, object):
return pickle.dumps(object)
storage_dumps = classmethod(storage_dumps)
def store(self, async=False):
objects_dir = self.get_objects_dir()
new_object = False
if self._filename:
if self._filename[0] == '/':
object_filename = self._filename
relative_object_filename = None
else:
object_filename = os.path.join(get_publisher().app_dir, self._filename)
relative_object_filename = os.path.join('..', self._filename)
else:
if not os.path.exists(objects_dir):
try:
os.mkdir(objects_dir)
except OSError, error:
if error.errno != 17: # 17 == Directory exists
raise
if self.id is None:
self.id = self.get_new_id(create=True)
new_object = True
object_filename = os.path.join(objects_dir, fix_key(self.id))
relative_object_filename = os.path.join('..', self.get_table_name(), fix_key(self.id))
previous_object_value = None
if not new_object and (self._indexes or self._hashed_indexes):
previous_object_value = self.get_filename(object_filename,
ignore_errors=True, ignore_migration=True)
s = self.storage_dumps(self)
atomic_write(object_filename, s, async)
# update last modified time
if os.path.exists(objects_dir):
os.utime(objects_dir, None)
try:
self.update_indexes(previous_object_value, relative_object_filename)
except:
# something failed, we can't keep using possibly broken indexes, so
# we notify of the bug and remove the indexes
get_publisher().notify_of_exception(sys.exc_info(), context='[STORAGE]')
self.destroy_indexes()
def destroy_indexes(cls):
objects_dir = cls.get_objects_dir()
directories_to_trash = []
directories_to_wipe = []
for index in cls._indexes or []:
index_dir = objects_dir + '-' + index
directories_to_trash.append(index_dir)
directories_to_trash.append(os.path.join(objects_dir, '.indexes'))
for directory in directories_to_trash:
if not os.path.exists(directory):
continue
i = 0
while True:
trashed_index_name = directory + '.trash-%s' % i
i += 1
try:
os.mkdir(trashed_index_name)
except OSError:
continue
try:
os.rename(directory, os.path.join(trashed_index_name, 'idx'))
except OSError:
continue
directories_to_wipe.append(trashed_index_name)
break
for directory in directories_to_wipe:
shutil.rmtree(directory)
destroy_indexes = classmethod(destroy_indexes)
def update_indexes(self, previous_object_value, relative_object_filename):
objects_dir = self.get_objects_dir()
rebuilt_indexes = False
for index in self._indexes or []:
if not hasattr(self, index) or getattr(self, index) is None:
continue
index_dir = objects_dir + '-' + index
link_name = os.path.join(index_dir, fix_key(str(getattr(self, index))))
if previous_object_value:
old_link_name = os.path.join(index_dir,
fix_key(str(getattr(previous_object_value, index))))
if os.path.exists(old_link_name):
if old_link_name == link_name:
continue
os.unlink(old_link_name)
try:
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(self.get_object_filename(), link_name)
except OSError, exc:
if exc.errno == 2:
os.mkdir(index_dir)
if not rebuilt_indexes:
# perhaps index dir got removed; rebuild it before
# adding elements to it.
self.rebuild_indexes()
rebuilt_indexes = True
elif exc.errno == 17:
os.unlink(link_name)
else:
raise
if not rebuilt_indexes:
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(self.get_object_filename(), link_name)
for index in self._hashed_indexes or []:
index_dir = os.path.join(objects_dir, '.indexes', index)
if not os.path.exists(index_dir):
os.makedirs(index_dir)
old_value = []
if type(getattr(self, index)) is dict:
new_value = getattr(self, index).values()
if previous_object_value:
old_value = getattr(previous_object_value, index)
if old_value is None:
old_value = []
else:
old_value = old_value.values()
elif type(getattr(self, index)) in (tuple, list, set):
new_value = getattr(self, index)
if previous_object_value:
old_value = getattr(previous_object_value, index)
else:
new_value = [getattr(self, index)]
if previous_object_value:
old_raw_value = getattr(previous_object_value, index)
if type(old_raw_value) is dict:
old_value = old_raw_value.values()
elif type(old_raw_value) in (tuple, list, set):
old_value = old_raw_value
else:
old_value = [old_raw_value]
for oldv in old_value:
if oldv in new_value:
continue
old_index_name = '%s-%s' % (index, fix_key(oldv))
old_index_file = os.path.join(index_dir, old_index_name)
if os.path.exists(old_index_file):
ids = pickle.load(file(old_index_file))
if self.id in ids:
ids.remove(self.id)
pickle.dump(ids, file(old_index_file, 'w'))
for newv in new_value:
if newv in old_value:
continue
index_name = '%s-%s' % (index, fix_key(newv))
index_file = os.path.join(index_dir, index_name)
if os.path.exists(index_file):
ids = pickle.load(file(index_file))
else:
ids = []
if not self.id in ids:
ids.append(self.id)
pickle.dump(ids, file(index_file, 'w'))
def volatile(cls):
o = cls()
o.id = None
return o
volatile = classmethod(volatile)
def remove_object(cls, id):
objects_dir = cls.get_objects_dir()
if cls._indexes or cls._hashed_indexes:
object = cls.get(id)
for index in cls._indexes or []:
if not hasattr(object, index) or getattr(object, index) is None:
continue
index_dir = objects_dir + '-' + index
link_name = os.path.join(index_dir, fix_key(str(getattr(object, index))))
try:
os.unlink(link_name)
except OSError:
pass
index_dir = os.path.join(objects_dir, '.indexes')
for index in cls._hashed_indexes or []:
attribute = getattr(object, index)
if type(attribute) not in (tuple, list, set):
attribute = [attribute]
for attr in attribute:
attr_value = fix_key(attr)
index_name = '%s-%s' % (index, attr_value)
index_file = os.path.join(index_dir, index, index_name)
if os.path.exists(index_file):
ids = pickle.load(file(index_file))
if object.id in ids:
ids.remove(object.id)
pickle.dump(ids, file(index_file, 'w'))
os.unlink(os.path.join(objects_dir, fix_key(id)))
remove_object = classmethod(remove_object)
def remove_self(self):
self.remove_object(self.id)
def wipe(cls):
tmpdir = tempfile.mkdtemp(prefix='wiping', dir=os.path.join(get_publisher().app_dir))
dirs_to_move = []
objects_dir = cls.get_objects_dir()
dirs_to_move.append(objects_dir)
for index in cls._indexes or []:
index_dir = objects_dir + '-' + index
dirs_to_move.append(index_dir)
for directory in dirs_to_move:
if os.path.exists(directory):
os.rename(directory, os.path.join(tmpdir, os.path.basename(directory)))
shutil.rmtree(tmpdir)
wipe = classmethod(wipe)
def __repr__(self):
if hasattr(self, 'display_name'):
display_name = '%r ' % self.display_name
elif hasattr(self, 'get_display_name'):
display_name = '%r ' % self.get_display_name()
elif hasattr(self, 'name'):
display_name = '%r ' % self.name
else:
display_name = ''
return '<%s %sid:%s>' % (self.__class__.__name__, display_name, self.id)