wcs/wcs/qommon/storage.py

552 lines
20 KiB
Python

# w.c.s. - web application for online forms
# Copyright (C) 2005-2010 Entr'ouvert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
import os
import time
import pickle
import os.path
import shutil
import tempfile
try:
import thread as _thread
except ImportError:
import dummy_thread as _thread
from quixote import get_publisher
def _take(l, x):
for y in l:
if x:
x -= 1
else:
break
yield y
def lax_int(s):
try:
return int(s)
except ValueError:
return -1
def fix_key(k):
# insure key can be inserted in filesystem
if not k: return k
return str(k).replace('/', '-')
def atomic_write(path, content, async=False):
'''Rewrite a complete file automatically, that is write to new file with
temporary name, fsync, then rename to final name. Use threads to remove blocking.'''
def doit():
dirname = os.path.dirname(path)
fd, temp = tempfile.mkstemp(dir=dirname,
prefix='.tmp-'+os.path.basename(path)+'-')
f = os.fdopen(fd, "w")
if hasattr(content, 'read'):
# file pointer
def read100k():
return content.read(100000)
for piece in iter(read100k, ''):
f.write(piece)
else:
f.write(content)
f.flush()
os.fsync(f.fileno())
f.close()
os.rename(temp, path)
if async:
_thread.start_new_thread(doit, ())
else:
doit()
class StorageIndexException(Exception):
pass
class StorableObject(object):
_indexes = None
_hashed_indexes = None
_filename = None # None, unless must be saved to a specific location
def __init__(self, id = None):
self.id = id
if get_publisher() and not self.id:
self.id = self.get_new_id()
def get_table_name(cls):
return cls._names
get_table_name = classmethod(get_table_name)
def get_objects_dir(cls):
return os.path.join(get_publisher().app_dir, cls.get_table_name())
get_objects_dir = classmethod(get_objects_dir)
def keys(cls):
if not os.path.exists(cls.get_objects_dir()):
return []
return [fix_key(x) for x in os.listdir(cls.get_objects_dir()) if x[0] != '.']
keys = classmethod(keys)
def values(cls, ignore_errors = False):
values = [cls.get(x, ignore_errors = ignore_errors) for x in cls.keys()]
return [x for x in values if x is not None]
values = classmethod(values)
def items(cls):
return [(x, cls.get(x)) for x in cls.keys()]
items = classmethod(items)
def count(cls):
return len(cls.keys())
count = classmethod(count)
def select(cls, clause = None, order_by = None, ignore_errors = False, limit = None):
keys = cls.keys()
objects = (cls.get(k, ignore_errors = ignore_errors) for k in keys)
if ignore_errors:
objects = (x for x in objects if x is not None)
if clause:
objects = (x for x in objects if clause(x))
if order_by:
order_by = str(order_by)
if order_by[0] == '-':
reverse = True
order_by = order_by[1:]
else:
reverse = False
# only list can be sorted
objects = list(objects)
objects.sort(lambda x,y: cmp(getattr(x, order_by), getattr(y, order_by)))
if reverse:
objects.reverse()
if limit:
objects = _take(objects, limit)
return list(objects)
select = classmethod(select)
def has_key(cls, id):
filename = os.path.join(cls.get_objects_dir(), fix_key(id))
return os.path.exists(filename)
has_key = classmethod(has_key)
def get_new_id(cls, create=False):
keys = cls.keys()
if not keys:
id = 1
else:
id = max([lax_int(x) for x in keys]) + 1
if id == 0:
id = len(keys)+1
if create:
objects_dir = cls.get_objects_dir()
object_filename = os.path.join(objects_dir, fix_key(id))
try:
fd = os.open(object_filename, os.O_CREAT | os.O_EXCL)
except OSError:
return cls.get_new_id(create=True)
os.close(fd)
return id
get_new_id = classmethod(get_new_id)
def get(cls, id, ignore_errors=False, ignore_migration=False):
if id is None:
if ignore_errors:
return None
else:
raise KeyError()
filename = os.path.join(cls.get_objects_dir(), fix_key(id))
return cls.get_filename(filename, ignore_errors=ignore_errors,
ignore_migration=ignore_migration)
get = classmethod(get)
def get_on_index(cls, id, index, ignore_errors=False, ignore_migration=False):
if not cls._indexes:
raise KeyError()
objects_dir = cls.get_objects_dir()
index_dir = objects_dir + '-' + index
if not os.path.exists(index_dir):
cls.rebuild_indexes()
filename = os.path.join(index_dir, str(fix_key(id)))
return cls.get_filename(filename, ignore_errors=ignore_errors,
ignore_migration=ignore_migration)
get_on_index = classmethod(get_on_index)
def get_ids_with_indexed_value(cls, index, value, auto_fallback=True):
objects_dir = cls.get_objects_dir()
index_dir = os.path.join(objects_dir, '.indexes', str(index))
index_file = os.path.join(index_dir, '%s-%s' % (index, fix_key(value)))
if not os.path.exists(index_dir):
if auto_fallback is False:
raise StorageIndexException()
try:
cls.rebuild_indexes()
except StorageIndexException:
values = cls.select(ignore_errors=True)
return [x for x in values if getattr(x, index) == value]
if not os.path.exists(index_file):
return []
return pickle.load(file(index_file))
get_ids_with_indexed_value = classmethod(get_ids_with_indexed_value)
def get_with_indexed_value(cls, index, value, ignore_errors = False):
ids = cls.get_ids_with_indexed_value(str(index), str(value))
objects = []
for x in ids:
obj = cls.get(x, ignore_errors = ignore_errors)
if obj is not None:
objects.append(obj)
return objects
get_with_indexed_value = classmethod(get_with_indexed_value)
def get_filename(cls, filename, ignore_errors=False, ignore_migration=False):
if get_publisher() and get_publisher().unpickler_class:
unpickler = get_publisher().unpickler_class
else:
unpickler = pickle.Unpickler
try:
o = unpickler(file(filename)).load()
except IOError:
if ignore_errors:
return None
raise KeyError()
except (EOFError, ImportError), e:
if ignore_errors:
return None
raise KeyError()
o.__class__ = cls
if not ignore_migration and hasattr(cls, 'migrate'):
o.migrate()
return o
get_filename = classmethod(get_filename)
def rebuild_indexes(cls):
if not (cls._indexes or cls._hashed_indexes):
return
objects_dir = cls.get_objects_dir()
hashed_indexes = {}
for index in cls._hashed_indexes or []:
index_dir = os.path.join(objects_dir, '.indexes', index)
if not os.path.exists(index_dir):
try:
os.makedirs(index_dir)
except OSError:
raise StorageIndexException()
for object in cls.values(ignore_errors = True):
object_filename = os.path.join(objects_dir, fix_key(object.id))
relative_object_filename = os.path.join('..', cls.get_table_name(), fix_key(object.id))
for index in cls._indexes or []:
if not hasattr(object, index) or getattr(object, index) is None:
continue
index_dir = objects_dir + '-' + index
link_name = os.path.join(index_dir, fix_key(str(getattr(object, index))))
try:
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(object_filename, link_name)
except OSError, exc:
if exc.errno == 2:
os.mkdir(index_dir)
elif exc.errno == 17:
os.unlink(link_name)
else:
raise
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(object_filename, link_name)
for index in cls._hashed_indexes or []:
if not hasattr(object, index) or getattr(object, index) is None:
continue
attribute = getattr(object, index)
if type(attribute) not in (tuple, list):
attribute = [attribute]
for attr in attribute:
attr_value = fix_key(attr)
index_name = '%s-%s' % (index, attr_value)
if not index_name in hashed_indexes:
hashed_indexes[index_name] = []
hashed_indexes[index_name].append(object.id)
for index, content in hashed_indexes.items():
index_key = index.split('-')[0]
index_file = os.path.join(objects_dir, '.indexes', index_key, index)
pickle.dump(content, file(index_file, 'w'))
rebuild_indexes = classmethod(rebuild_indexes)
def get_object_filename(self):
if self._filename:
if self._filename[0] == '/':
return self._filename
else:
return os.path.join(get_publisher().app_dir, self._filename)
else:
objects_dir = self.get_objects_dir()
return os.path.join(objects_dir, fix_key(self.id))
def store(self, async=False):
objects_dir = self.get_objects_dir()
new_object = False
if self._filename:
if self._filename[0] == '/':
object_filename = self._filename
relative_object_filename = None
else:
object_filename = os.path.join(get_publisher().app_dir, self._filename)
relative_object_filename = os.path.join('..', self._filename)
else:
if not os.path.exists(objects_dir):
try:
os.mkdir(objects_dir)
except OSError, error:
if error.errno != 17: # 17 == Directory exists
raise
pass
if self.id is None:
self.id = self.get_new_id(create=True)
new_object = True
object_filename = os.path.join(objects_dir, fix_key(self.id))
relative_object_filename = os.path.join('..', self.get_table_name(), fix_key(self.id))
previous_object_value = None
if not new_object and (self._indexes or self._hashed_indexes):
previous_object_value = self.get_filename(object_filename,
ignore_errors=True, ignore_migration=True)
s = pickle.dumps(self)
atomic_write(object_filename, s, async)
# update last modified time
if os.path.exists(objects_dir):
os.utime(objects_dir, None)
try:
self.update_indexes(previous_object_value, relative_object_filename)
except:
# something failed, we can't keep using possibly broken indexes, we
# therefore remove them
self.destroy_indexes()
def destroy_indexes(cls):
objects_dir = cls.get_objects_dir()
directories_to_trash = []
directories_to_wipe = []
for index in cls._indexes or []:
index_dir = objects_dir + '-' + index
directories_to_trash.append(index_dir)
directories_to_trash.append(os.path.join(objects_dir, '.indexes'))
for directory in directories_to_trash:
if not os.path.exists(directory):
continue
i = 0
while True:
trashed_index_name = directory + '.trash-%s' % i
i += 1
try:
os.mkdir(trashed_index_name)
except OSError:
continue
try:
os.rename(directory, os.path.join(trashed_index_name, 'idx'))
except OSError:
continue
directories_to_wipe.append(trashed_index_name)
break
for directory in directories_to_wipe:
shutil.rmtree(directory)
destroy_indexes = classmethod(destroy_indexes)
def update_indexes(self, previous_object_value, relative_object_filename):
objects_dir = self.get_objects_dir()
rebuilt_indexes = False
for index in self._indexes or []:
if not hasattr(self, index) or getattr(self, index) is None:
continue
index_dir = objects_dir + '-' + index
link_name = os.path.join(index_dir, fix_key(str(getattr(self, index))))
if previous_object_value:
old_link_name = os.path.join(index_dir,
fix_key(str(getattr(previous_object_value, index))))
if os.path.exists(old_link_name):
if old_link_name == link_name:
continue
os.unlink(old_link_name)
try:
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(self.get_object_filename(), link_name)
except OSError, exc:
if exc.errno == 2:
os.mkdir(index_dir)
if not rebuilt_indexes:
# perhaps index dir got removed; rebuild it before
# adding elements to it.
self.rebuild_indexes()
rebuilt_indexes = True
elif exc.errno == 17:
os.unlink(link_name)
else:
raise
if not rebuilt_indexes:
if relative_object_filename:
os.symlink(relative_object_filename, link_name)
else:
os.symlink(self.get_object_filename(), link_name)
for index in self._hashed_indexes or []:
index_dir = os.path.join(objects_dir, '.indexes', index)
if not os.path.exists(index_dir):
os.makedirs(index_dir)
old_value = []
if type(getattr(self, index)) in (tuple, list):
new_value = getattr(self, index)
if previous_object_value:
old_value = getattr(previous_object_value, index)
else:
new_value = [getattr(self, index)]
if previous_object_value:
old_value = [getattr(previous_object_value, index)]
for oldv in old_value:
if oldv in new_value:
continue
old_index_name = '%s-%s' % (index, fix_key(oldv))
old_index_file = os.path.join(index_dir, old_index_name)
if os.path.exists(old_index_file):
ids = pickle.load(file(old_index_file))
if self.id in ids:
ids.remove(self.id)
pickle.dump(ids, file(old_index_file, 'w'))
for newv in new_value:
if newv in old_value:
continue
index_name = '%s-%s' % (index, fix_key(newv))
index_file = os.path.join(index_dir, index_name)
if os.path.exists(index_file):
ids = pickle.load(file(index_file))
else:
ids = []
if not self.id in ids:
ids.append(self.id)
pickle.dump(ids, file(index_file, 'w'))
def volatile(cls):
o = cls()
o.id = None
return o
volatile = classmethod(volatile)
def remove_object(cls, id):
objects_dir = cls.get_objects_dir()
if cls._indexes or cls._hashed_indexes:
object = cls.get(id)
for index in cls._indexes or []:
if not hasattr(object, index) or getattr(object, index) is None:
continue
index_dir = objects_dir + '-' + index
link_name = os.path.join(index_dir, fix_key(str(getattr(object, index))))
try:
os.unlink(link_name)
except OSError:
pass
index_dir = os.path.join(objects_dir, '.indexes')
for index in cls._hashed_indexes or []:
attribute = getattr(object, index)
if type(attribute) not in (tuple, list):
attribute = [attribute]
for attr in attribute:
attr_value = fix_key(attr)
index_name = '%s-%s' % (index, attr_value)
index_file = os.path.join(index_dir, index, index_name)
if os.path.exists(index_file):
ids = pickle.load(file(index_file))
if object.id in ids:
ids.remove(object.id)
pickle.dump(ids, file(index_file, 'w'))
os.unlink(os.path.join(objects_dir, fix_key(id)))
remove_object = classmethod(remove_object)
def remove_self(self):
self.remove_object(self.id)
def last_modified_id(cls, id):
filename = os.path.join(cls.get_objects_dir(), fix_key(id))
mtime = 0
try:
stat = os.stat(filename)
mtime = stat.st_mtime
except OSError:
mtime = int(time.time())
return mtime
last_modified_id = classmethod(last_modified_id)
def last_modified(cls):
mtime = 0
try:
stat = os.stat(cls.get_objects_dir())
mtime = stat.st_mtime
except OSError:
mtime = int(time.time())
return mtime
last_modified = classmethod(last_modified)
def wipe(cls):
tmpdir = tempfile.mkdtemp(prefix='wiping', dir=os.path.join(get_publisher().app_dir))
dirs_to_move = []
objects_dir = cls.get_objects_dir()
dirs_to_move.append(objects_dir)
for index in cls._indexes or []:
index_dir = objects_dir + '-' + index
dirs_to_move.append(index_dir)
for directory in dirs_to_move:
if os.path.exists(directory):
os.rename(directory, os.path.join(tmpdir, os.path.basename(directory)))
shutil.rmtree(tmpdir)
wipe = classmethod(wipe)
def __repr__(self):
if hasattr(self, 'display_name'):
display_name = '%r ' % self.display_name
elif hasattr(self, 'get_display_name'):
display_name = '%r ' % self.get_display_name()
elif hasattr(self, 'name'):
display_name = '%r ' % self.name
else:
display_name = ''
return '<%s %sid:%s>' % (self.__class__.__name__, display_name, self.id)