wcs/wcs/qommon/storage.py

# w.c.s. - web application for online forms
# Copyright (C) 2005-2010  Entr'ouvert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

import operator
import os
import time
import pickle
import os.path
import shutil
import sys
import tempfile
try:
    import thread as _thread
except ImportError:
    import dummy_thread as _thread


from quixote import get_publisher

def _take(objects, limit, offset=0):
    for y in objects:
        if offset:
            offset -= 1
            continue
        if limit:
            limit -= 1
        elif limit == 0:
            break
        elif limit is None:
            pass
        yield y

def lax_int(s):
    try:
        return int(s)
    except ValueError:
        return -1

def fix_key(k):
    # insure key can be inserted in filesystem
    if not k: return k
    return str(k).replace('/', '-')

def atomic_write(path, content, async=False):
    '''Rewrite a complete file automatically, that is write to new file with
    temporary name, fsync, then rename to final name. Use threads to remove blocking.'''
    def doit():
        dirname = os.path.dirname(path)
        fd, temp = tempfile.mkstemp(dir=dirname,
                prefix='.tmp-'+os.path.basename(path)+'-')
        f = os.fdopen(fd, "w")
        if hasattr(content, 'read'):
            # file pointer
            def read100k():
                return content.read(100000)
            for piece in iter(read100k, ''):
                f.write(piece)
        else:
            f.write(content)
        f.flush()
        os.fsync(f.fileno())
        f.close()
        os.rename(temp, path)
    if async:
        _thread.start_new_thread(doit, ())
    else:
        doit()


class Criteria(object):
    def __init__(self, attribute, value):
        self.attribute = attribute
        self.value = value

    def build_lambda(self):
        return lambda x: self.op(getattr(x, self.attribute), self.value)


class Less(Criteria):
    op = operator.lt

class Greater(Criteria):
    op = operator.gt

class Equal(Criteria):
    op = operator.eq

class NotEqual(Criteria):
    op = operator.ne

class LessOrEqual(Criteria):
    op = operator.le

class GreaterOrEqual(Criteria):
    op = operator.ge

class Contains(Criteria):
    op = operator.contains

    def build_lambda(self):
        return lambda x: self.op(self.value, getattr(x, self.attribute))

class NotContains(Criteria):
    op = operator.contains

    def build_lambda(self):
        return lambda x: not self.op(self.value, getattr(x, self.attribute))

class Intersects(Criteria):
    def build_lambda(self):
        value = set(self.value)
        return lambda x: value.intersection(set(getattr(x, self.attribute) or []))

class Or(Criteria):
    def __init__(self, criterias):
        self.criterias = criterias

    def build_lambda(self):
        func = lambda x: False
        def combine_callables(x1, x2):
            return lambda x: x1(x) or x2(x)
        for element in self.criterias:
            func = combine_callables(func, element.build_lambda())
        return func

class And(Criteria):
    def __init__(self, criterias):
        self.criterias = criterias

    def build_lambda(self):
        func = lambda x: True
        def combine_callables(x1, x2):
            return lambda x: x1(x) and x2(x)
        for element in self.criterias:
            func = combine_callables(func, element.build_lambda())
        return func

class ILike(Criteria):
    def build_lambda(self):
        return lambda x: self.value.lower() in (getattr(x, self.attribute) or '').lower()

def parse_clause(clause):
    # creates a callable out of a clause
    #  (attribute, operator, value)

    if callable(clause): # already a callable
        return clause

    def combine_callables(x1, x2):
        return lambda x: x1(x) and x2(x)

    func = lambda x: True
    for element in clause:
        if callable(element):
            func = combine_callables(func, element)
        else:
            func = combine_callables(func, element.build_lambda())
    return func


class StorageIndexException(Exception):
    pass

class StorableObject(object):
    _indexes = None
    _hashed_indexes = None
    _filename = None # None, unless must be saved to a specific location

    def __init__(self, id = None):
        self.id = id
        if get_publisher() and not self.id:
            self.id = self.get_new_id()

    def get_table_name(cls):
        return cls._names
    get_table_name = classmethod(get_table_name)

    def get_objects_dir(cls):
        return os.path.join(get_publisher().app_dir, cls.get_table_name())
    get_objects_dir = classmethod(get_objects_dir)

    def keys(cls):
        if not os.path.exists(cls.get_objects_dir()):
            return []
        return [fix_key(x) for x in os.listdir(cls.get_objects_dir()) if x[0] != '.']
    keys = classmethod(keys)

    def values(cls, ignore_errors=False, ignore_migration=True):
        values = [cls.get(x, ignore_errors=ignore_errors, ignore_migration=True) for x in cls.keys()]
        return [x for x in values if x is not None]
    values = classmethod(values)

    def items(cls):
        return [(x, cls.get(x)) for x in cls.keys()]
    items = classmethod(items)

    def count(cls, clause=None):
        if clause:
            return len(cls.select(clause))
        return len(cls.keys())
    count = classmethod(count)

    def select(cls, clause=None, order_by=None, ignore_errors=False, limit=None, offset=None):
        keys = cls.keys()
        objects = (cls.get(k, ignore_errors = ignore_errors) for k in keys)
        if ignore_errors:
            objects = (x for x in objects if x is not None)
        if clause:
            clause_function = parse_clause(clause)
            objects = (x for x in objects if clause_function(x))
        if order_by:
            order_by = str(order_by)
            if order_by[0] == '-':
                reverse = True
                order_by = order_by[1:]
            else:
                reverse = False
            # only list can be sorted
            objects = list(objects)
            if order_by == 'id':
                cmp_function = lambda x, y: cmp(lax_int(x.id), lax_int(y.id))
            else:
                cmp_function = lambda x, y: cmp(getattr(x, order_by), getattr(y, order_by))
            objects.sort(cmp_function)
            if reverse:
                objects.reverse()
        if limit or offset:
            objects = _take(objects, limit, offset)
        return list(objects)
    select = classmethod(select)

    def has_key(cls, id):
        filename = os.path.join(cls.get_objects_dir(), fix_key(id))
        return os.path.exists(filename)
    has_key = classmethod(has_key)

    def get_new_id(cls, create=False):
        keys = cls.keys()
        if not keys:
            id = 1
        else:
            id = max([lax_int(x) for x in keys]) + 1
            if id == 0:
                id = len(keys)+1
        if create:
            objects_dir = cls.get_objects_dir()
            object_filename = os.path.join(objects_dir, fix_key(id))
            try:
                fd = os.open(object_filename, os.O_CREAT | os.O_EXCL)
            except OSError:
                return cls.get_new_id(create=True)
            os.close(fd)
        return str(id)
    get_new_id = classmethod(get_new_id)

    def get(cls, id, ignore_errors=False, ignore_migration=False):
        if id is None:
            if ignore_errors:
                return None
            else:
                raise KeyError()
        filename = os.path.join(cls.get_objects_dir(), fix_key(id))
        return cls.get_filename(filename, ignore_errors=ignore_errors,
                                ignore_migration=ignore_migration)
    get = classmethod(get)

    def get_ids(cls, ids, ignore_errors=False, keep_order=False):
        objects = []
        for x in ids:
            obj = cls.get(x, ignore_errors=ignore_errors)
            if obj is not None:
                objects.append(obj)
        return objects
    get_ids = classmethod(get_ids)

    def get_on_index(cls, id, index, ignore_errors=False, ignore_migration=False):
        if not cls._indexes:
            raise KeyError()
        objects_dir = cls.get_objects_dir()
        index_dir = objects_dir + '-' + index
        if not os.path.exists(index_dir):
            cls.rebuild_indexes()
        filename = os.path.join(index_dir, str(fix_key(id)))
        return cls.get_filename(filename, ignore_errors=ignore_errors,
                ignore_migration=ignore_migration)
    get_on_index = classmethod(get_on_index)

    def get_ids_with_indexed_value(cls, index, value, auto_fallback=True):
        objects_dir = cls.get_objects_dir()
        index_dir = os.path.join(objects_dir, '.indexes', str(index))
        index_file = os.path.join(index_dir, '%s-%s' % (index, fix_key(value)))
        if not os.path.exists(index_dir):
            if auto_fallback is False:
                raise StorageIndexException()
            try:
                cls.rebuild_indexes()
            except StorageIndexException:
                values = cls.select(ignore_errors=True)
                return [x for x in values if getattr(x, index) == value]
        if not os.path.exists(index_file):
            return []
        return pickle.load(file(index_file))
    get_ids_with_indexed_value = classmethod(get_ids_with_indexed_value)

    def get_with_indexed_value(cls, index, value, ignore_errors = False):
        ids = cls.get_ids_with_indexed_value(str(index), str(value))
        return cls.get_ids(ids)
    get_with_indexed_value = classmethod(get_with_indexed_value)

    def storage_load(cls, fd):
        if get_publisher() and get_publisher().unpickler_class:
            unpickler = get_publisher().unpickler_class
        else:
            unpickler = pickle.Unpickler
        return unpickler(fd).load()
    storage_load = classmethod(storage_load)

    def get_filename(cls, filename, ignore_errors=False, ignore_migration=False):
        try:
            o = cls.storage_load(file(filename))
        except IOError:
            if ignore_errors:
                return None
            raise KeyError()
        except (EOFError, ImportError), e:
            if ignore_errors:
                return None
            raise KeyError()
        o.__class__ = cls
        if not ignore_migration:
            o.id = str(o.id) # makes sure 'id' is a string
            if hasattr(cls, 'migrate'):
                o.migrate()
        return o
    get_filename = classmethod(get_filename)

    def rebuild_indexes(cls, indexes=[]):
        if not (cls._indexes or cls._hashed_indexes):
            return

        if not indexes:
            indexes = (cls._hashed_indexes or []) + (cls._indexes or [])

        objects_dir = cls.get_objects_dir()

        hashed_indexes = {}

        for index in cls._hashed_indexes or []:
            if index not in indexes:
                continue
            index_dir = os.path.join(objects_dir, '.indexes', index)
            if not os.path.exists(index_dir):
                try:
                    os.makedirs(index_dir)
                except OSError:
                    raise StorageIndexException()

        for object in cls.values(ignore_errors=True, ignore_migration=True):
            object_filename = os.path.join(objects_dir, fix_key(object.id))
            relative_object_filename = os.path.join('..', cls.get_table_name(), fix_key(object.id))
            for index in cls._indexes or []:
                if index not in indexes:
                    continue
                if not hasattr(object, index) or getattr(object, index) is None:
                    continue
                index_dir = objects_dir + '-' + index
                link_name = os.path.join(index_dir, fix_key(str(getattr(object, index))))
                try:
                    if relative_object_filename:
                        os.symlink(relative_object_filename, link_name)
                    else:
                        os.symlink(object_filename, link_name)
                except OSError, exc:
                    if exc.errno == 2:
                        os.mkdir(index_dir)
                    elif exc.errno == 17:
                        os.unlink(link_name)
                    else:
                        raise
                    if relative_object_filename:
                        os.symlink(relative_object_filename, link_name)
                    else:
                        os.symlink(object_filename, link_name)
            for index in cls._hashed_indexes or []:
                if index not in indexes:
                    continue
                if not hasattr(object, index) or getattr(object, index) is None:
                    continue
                attribute = getattr(object, index)
                if type(attribute) is dict:
                    attribute = attribute.values()
                elif type(attribute) not in (tuple, list, set):
                    attribute = [attribute]
                for attr in attribute:
                    attr_value = fix_key(attr)
                    index_name = '%s-%s' % (index, attr_value)
                    if not index_name in hashed_indexes:
                        hashed_indexes[index_name] = []
                    hashed_indexes[index_name].append(object.id)

        for index, content in hashed_indexes.items():
            index_key = index.split('-')[0]
            if index_key not in indexes:
                continue
            index_file = os.path.join(objects_dir, '.indexes', index_key, index)
            pickle.dump(content, file(index_file, 'w'))

        for index in cls._hashed_indexes or []:
            if index not in indexes:
                continue
            index_dir = os.path.join(objects_dir, '.indexes', index)
            for filename in os.listdir(index_dir):
                if filename not in hashed_indexes:
                    os.unlink(os.path.join(index_dir, filename))

    rebuild_indexes = classmethod(rebuild_indexes)

    def get_object_filename(self):
        if self._filename:
            if self._filename[0] == '/':
                return self._filename
            else:
                return os.path.join(get_publisher().app_dir, self._filename)
        else:
            objects_dir = self.get_objects_dir()
            return os.path.join(objects_dir, fix_key(self.id))

    def storage_dumps(cls, object):
        return pickle.dumps(object)
    storage_dumps = classmethod(storage_dumps)

    def store(self, async=False):
        objects_dir = self.get_objects_dir()
        new_object = False
        if self._filename:
            if self._filename[0] == '/':
                object_filename = self._filename
                relative_object_filename = None
            else:
                object_filename = os.path.join(get_publisher().app_dir, self._filename)
                relative_object_filename = os.path.join('..', self._filename)
        else:
            if not os.path.exists(objects_dir):
                try:
                    os.mkdir(objects_dir)
                except OSError, error:
                    if error.errno != 17: # 17 == Directory exists
                        raise
            if self.id is None:
                self.id = self.get_new_id(create=True)
                new_object = True
            object_filename = os.path.join(objects_dir, fix_key(self.id))
            relative_object_filename = os.path.join('..', self.get_table_name(), fix_key(self.id))

        previous_object_value = None
        if not new_object and (self._indexes or self._hashed_indexes):
            previous_object_value = self.get_filename(object_filename,
                            ignore_errors=True, ignore_migration=True)

        s = self.storage_dumps(self)
        atomic_write(object_filename, s, async)
        # update last modified time
        if os.path.exists(objects_dir):
            os.utime(objects_dir, None)

        try:
            self.update_indexes(previous_object_value, relative_object_filename)
        except:
            # something failed, we can't keep using possibly broken indexes, so
            # we notify of the bug and remove the indexes
            get_publisher().notify_of_exception(sys.exc_info(), context='[STORAGE]')
            self.destroy_indexes()

    def destroy_indexes(cls):
        objects_dir = cls.get_objects_dir()

        directories_to_trash = []
        directories_to_wipe = []

        for index in cls._indexes or []:
            index_dir = objects_dir + '-' + index
            directories_to_trash.append(index_dir)

        directories_to_trash.append(os.path.join(objects_dir, '.indexes'))

        for directory in directories_to_trash:
            if not os.path.exists(directory):
                continue
            i = 0
            while True:
                trashed_index_name = directory + '.trash-%s' % i
                i += 1
                try:
                    os.mkdir(trashed_index_name)
                except OSError:
                    continue
                try:
                    os.rename(directory, os.path.join(trashed_index_name, 'idx'))
                except OSError:
                    continue
                directories_to_wipe.append(trashed_index_name)
                break

        for directory in directories_to_wipe:
            shutil.rmtree(directory)
    destroy_indexes = classmethod(destroy_indexes)

    def update_indexes(self, previous_object_value, relative_object_filename):
        objects_dir = self.get_objects_dir()
        rebuilt_indexes = False
        for index in self._indexes or []:
            if not hasattr(self, index) or getattr(self, index) is None:
                continue
            index_dir = objects_dir + '-' + index

            link_name = os.path.join(index_dir, fix_key(str(getattr(self, index))))

            if previous_object_value:
                old_link_name = os.path.join(index_dir,
                        fix_key(str(getattr(previous_object_value, index))))
                if os.path.exists(old_link_name):
                    if old_link_name == link_name:
                        continue
                    os.unlink(old_link_name)

            try:
                if relative_object_filename:
                    os.symlink(relative_object_filename, link_name)
                else:
                    os.symlink(self.get_object_filename(), link_name)
            except OSError, exc:
                if exc.errno == 2:
                    os.mkdir(index_dir)
                    if not rebuilt_indexes:
                        # perhaps index dir got removed; rebuild it before
                        # adding elements to it.
                        self.rebuild_indexes()
                        rebuilt_indexes = True
                elif exc.errno == 17:
                    os.unlink(link_name)
                else:
                    raise
                if not rebuilt_indexes:
                    if relative_object_filename:
                        os.symlink(relative_object_filename, link_name)
                    else:
                        os.symlink(self.get_object_filename(), link_name)

        for index in self._hashed_indexes or []:
            index_dir = os.path.join(objects_dir, '.indexes', index)
            if not os.path.exists(index_dir):
                os.makedirs(index_dir)

            old_value = []
            if type(getattr(self, index)) is dict:
                new_value = getattr(self, index).values()
                if previous_object_value:
                    old_value = getattr(previous_object_value, index)
                    if old_value is None:
                        old_value = []
                    else:
                        old_value = old_value.values()
            elif type(getattr(self, index)) in (tuple, list, set):
                new_value = getattr(self, index)
                if previous_object_value:
                    old_value = getattr(previous_object_value, index)
            else:
                new_value = [getattr(self, index)]
                if previous_object_value:
                    old_raw_value = getattr(previous_object_value, index)
                    if type(old_raw_value) is dict:
                        old_value = old_raw_value.values()
                    elif type(old_raw_value) in (tuple, list, set):
                        old_value = old_raw_value
                    else:
                        old_value = [old_raw_value]

            for oldv in old_value:
                if oldv in new_value:
                    continue
                old_index_name = '%s-%s' % (index, fix_key(oldv))
                old_index_file = os.path.join(index_dir, old_index_name)
                if os.path.exists(old_index_file):
                    ids = pickle.load(file(old_index_file))
                    if self.id in ids:
                        ids.remove(self.id)
                        pickle.dump(ids, file(old_index_file, 'w'))

            for newv in new_value:
                if newv in old_value:
                    continue
                index_name = '%s-%s' % (index, fix_key(newv))
                index_file = os.path.join(index_dir, index_name)
                if os.path.exists(index_file):
                    ids = pickle.load(file(index_file))
                else:
                    ids = []
                if not self.id in ids:
                    ids.append(self.id)
                pickle.dump(ids, file(index_file, 'w'))


    def volatile(cls):
        o = cls()
        o.id = None
        return o
    volatile = classmethod(volatile)

    def remove_object(cls, id):
        objects_dir = cls.get_objects_dir()

        if cls._indexes or cls._hashed_indexes:
            object = cls.get(id)
            for index in cls._indexes or []:
                if not hasattr(object, index) or getattr(object, index) is None:
                    continue
                index_dir = objects_dir + '-' + index
                link_name = os.path.join(index_dir, fix_key(str(getattr(object, index))))
                try:
                    os.unlink(link_name)
                except OSError:
                    pass

            index_dir = os.path.join(objects_dir, '.indexes')
            for index in cls._hashed_indexes or []:
                attribute = getattr(object, index)
                if type(attribute) not in (tuple, list, set):
                    attribute = [attribute]
                for attr in attribute:
                    attr_value = fix_key(attr)
                    index_name = '%s-%s' % (index, attr_value)
                    index_file = os.path.join(index_dir, index, index_name)
                    if os.path.exists(index_file):
                        ids = pickle.load(file(index_file))
                        if object.id in ids:
                            ids.remove(object.id)
                            pickle.dump(ids, file(index_file, 'w'))

        os.unlink(os.path.join(objects_dir, fix_key(id)))

    remove_object = classmethod(remove_object)

    def remove_self(self):
        self.remove_object(self.id)

    def wipe(cls):
        tmpdir = tempfile.mkdtemp(prefix='wiping', dir=os.path.join(get_publisher().app_dir))
        dirs_to_move = []
        objects_dir = cls.get_objects_dir()
        dirs_to_move.append(objects_dir)
        for index in cls._indexes or []:
            index_dir = objects_dir + '-' + index
            dirs_to_move.append(index_dir)

        for directory in dirs_to_move:
            if os.path.exists(directory):
                os.rename(directory, os.path.join(tmpdir, os.path.basename(directory)))

        shutil.rmtree(tmpdir)
    wipe = classmethod(wipe)

    def __repr__(self):
        if hasattr(self, 'display_name'):
            display_name = '%r ' % self.display_name
        elif hasattr(self, 'get_display_name'):
            display_name = '%r ' % self.get_display_name()
        elif hasattr(self, 'name'):
            display_name = '%r ' % self.name
        else:
            display_name = ''
        return '<%s %sid:%s>' % (self.__class__.__name__, display_name, self.id)