summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Dauvergne <bdauvergne@entrouvert.com>2015-11-16 23:09:48 (GMT)
committerBenjamin Dauvergne <bdauvergne@entrouvert.com>2015-11-16 23:09:48 (GMT)
commit4c7909e023f9639d2e8b981df2183e0ba89be6e1 (patch)
tree519fd37321fd0d0e71da4a51dbd04d0f7f7b87ad
downloadwcs-elasticsearch-4c7909e023f9639d2e8b981df2183e0ba89be6e1.zip
wcs-elasticsearch-4c7909e023f9639d2e8b981df2183e0ba89be6e1.tar.gz
wcs-elasticsearch-4c7909e023f9639d2e8b981df2183e0ba89be6e1.tar.bz2
first commitHEADmaster
-rw-r--r--README1
-rwxr-xr-xsetup.py65
-rw-r--r--wcs_es/__init__.py0
-rw-r--r--wcs_es/cmd.py57
-rw-r--r--wcs_es/signature.py72
-rw-r--r--wcs_es/wcs_api.py96
6 files changed, 291 insertions, 0 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..c92f55c
--- /dev/null
+++ b/README
@@ -0,0 +1 @@
+W.C.S. ElasticSearch integration
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..52fafa2
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,65 @@
+#! /usr/bin/env python
+
+''' Setup script for wcs-es
+'''
+
+import os
+import subprocess
+from setuptools import setup, find_packages
+from distutils.command.sdist import sdist as _sdist
+
+
+class eo_sdist(_sdist):
+ def run(self):
+ if os.path.exists('VERSION'):
+ os.remove('VERSION')
+ version = get_version()
+ version_file = open('VERSION', 'w')
+ version_file.write(version)
+ version_file.close()
+ _sdist.run(self)
+ if os.path.exists('VERSION'):
+ os.remove('VERSION')
+
+
+def get_version():
+ '''Use the VERSION, if absent generates a version with git describe, if not
+ tag exists, take 0.0.0- and add the length of the commit log.
+ '''
+ if os.path.exists('VERSION'):
+ with open('VERSION', 'r') as v:
+ return v.read()
+ if os.path.exists('.git'):
+ p = subprocess.Popen(['git', 'describe', '--dirty', '--match=v*'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ result = p.communicate()[0]
+ if p.returncode == 0:
+ return result.split()[0][1:].replace('-', '.')
+ else:
+ return '0.0.0-%s' % len(subprocess.check_output(
+ ['git', 'rev-list', 'HEAD']).splitlines())
+ return '0.0.0'
+
+setup(name="wcs-es",
+ version=get_version(),
+ license="AGPLv3 or later",
+ description="W.C.S. ElasticSearch feeder",
+ long_description=file('README').read(),
+ url="http://dev.entrouvert.org/projects/wcs-es/",
+ author="Entr'ouvert",
+ author_email="info@entrouvert.org",
+ include_package_data=True,
+ packages=find_packages(),
+ install_requires=[
+ 'requests',
+ 'elasticsearch',
+ ],
+ setup_requires=[
+ ],
+ tests_require=[
+ 'pytest',
+ ],
+ dependency_links=[],
+ cmdclass={
+ 'sdist': eo_sdist,
+ })
diff --git a/wcs_es/__init__.py b/wcs_es/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/wcs_es/__init__.py
diff --git a/wcs_es/cmd.py b/wcs_es/cmd.py
new file mode 100644
index 0000000..34e3662
--- /dev/null
+++ b/wcs_es/cmd.py
@@ -0,0 +1,57 @@
+from . import wcs_api
+
+def provision(api, es, index_name, recreate=False):
+ if recreate and es.indices.exists(index_name):
+ es.indices.delete(index_name)
+ for formdef in api.get_formdefs():
+ es.create(index=index_name, doc_type='formdef', body=formdef.json(), id=formdef.slug)
+ try:
+ for data in formdef.datas:
+ es.create(index_name, doc_type='form-'+formdef.slug, body=data.json(),
+ id=data.display_id)
+ print formdef.slug, 'populated'
+ except wcs_api.WcsApiError:
+ print '!!!', formdef.slug, 'failed'
+
+if __name__ == '__main__':
+ import argparse
+ import ConfigParser
+ import os
+ import elasticsearch
+ import urlparse
+
+ config = ConfigParser.ConfigParser()
+ config_file = os.path.expanduser('~/.wcs_es.ini')
+ if os.path.exists(config_file):
+ config.read(config_file)
+ urls = [url for url in config.sections() if url.startswith('http://') or
+ url.startswith('https://')]
+ parser = argparse.ArgumentParser(description='Provision ES with W.C.S. data')
+ parser.add_argument('--url', help='url of the w.c.s. instance', required=not urls,
+ default=(urls or [None])[0])
+ args, rest = parser.parse_known_args()
+ defaults = {}
+ if getattr(args, 'url') and config.has_section(args.url):
+ defaults = dict(config.items(args.url))
+ parser.add_argument('--orig', help='origin of the request for signatures',
+ required='orig' not in defaults)
+ parser.add_argument('--key', help='HMAC key for signatures', required='key' not in defaults)
+ group = parser.add_mutually_exclusive_group(
+ required='email' not in defaults and 'name_id' not in defaults)
+ group.add_argument('--email', help='email for authentication')
+ group.add_argument('--name-id', help='NameID for authentication')
+ parser.add_argument('--es-host', help='ElasticSearch hostname', default='localhost')
+ parser.add_argument('--es-port', help='ElasticSearch hostname', type=int, default=9200)
+ parser.add_argument('--es-no-recreate', help='ElasticSearch hostname', dest='es_recreate',
+ default=True, action='store_false')
+ if defaults:
+ parser.set_defaults(**defaults)
+
+ args = parser.parse_args()
+ api = wcs_api.WcsApi(url=args.url, orig=args.orig, key=args.key, email=args.email,
+ name_id=args.name_id)
+ index_name = urlparse.urlparse(args.url).netloc.split(':')[0].replace('.', '_')
+ es_hosts = [{'host': args.es_host, 'port': args.es_port, 'use_ssl': False}]
+ print es_hosts
+ es = elasticsearch.Elasticsearch(es_hosts)
+ provision(api, es, index_name, recreate=args.es_recreate)
diff --git a/wcs_es/signature.py b/wcs_es/signature.py
new file mode 100644
index 0000000..30124f9
--- /dev/null
+++ b/wcs_es/signature.py
@@ -0,0 +1,72 @@
+import datetime
+import base64
+import hmac
+import hashlib
+import urllib
+import random
+import urlparse
+
+'''Simple signature scheme for query strings'''
+
+
+def sign_url(url, key, algo='sha256', timestamp=None, nonce=None):
+ parsed = urlparse.urlparse(url)
+ new_query = sign_query(parsed.query, key, algo, timestamp, nonce)
+ return urlparse.urlunparse(parsed[:4] + (new_query,) + parsed[5:])
+
+
+def sign_query(query, key, algo='sha256', timestamp=None, nonce=None):
+ if timestamp is None:
+ timestamp = datetime.datetime.utcnow()
+ timestamp = timestamp.strftime('%Y-%m-%dT%H:%M:%SZ')
+ if nonce is None:
+ nonce = hex(random.SystemRandom().getrandbits(128))[2:-1]
+ new_query = query
+ if new_query:
+ new_query += '&'
+ new_query += urllib.urlencode((
+ ('algo', algo),
+ ('timestamp', timestamp),
+ ('nonce', nonce)))
+ signature = base64.b64encode(sign_string(new_query, key, algo=algo))
+ new_query += '&signature=' + urllib.quote(signature)
+ return new_query
+
+
+def sign_string(s, key, algo='sha256', timedelta=30):
+ digestmod = getattr(hashlib, algo)
+ if isinstance(key, unicode):
+ key = key.encode('utf-8')
+ hash = hmac.HMAC(key, digestmod=digestmod, msg=s)
+ return hash.digest()
+
+
+def check_url(url, key, known_nonce=None, timedelta=30):
+ parsed = urlparse.urlparse(url, 'https')
+ return check_query(parsed.query, key)
+
+
+def check_query(query, key, known_nonce=None, timedelta=30):
+ parsed = urlparse.parse_qs(query)
+ signature = base64.b64decode(parsed['signature'][0])
+ algo = parsed['algo'][0]
+ timestamp = parsed['timestamp'][0]
+ timestamp = datetime.datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ')
+ nonce = parsed['nonce']
+ unsigned_query = query.split('&signature=')[0]
+ if known_nonce is not None and known_nonce(nonce):
+ return False
+ if abs(datetime.datetime.utcnow() - timestamp) > datetime.timedelta(seconds=timedelta):
+ return False
+ return check_string(unsigned_query, signature, key, algo=algo)
+
+
+def check_string(s, signature, key, algo='sha256'):
+ # constant time compare
+ signature2 = sign_string(s, key, algo=algo)
+ if len(signature2) != len(signature):
+ return False
+ res = 0
+ for a, b in zip(signature, signature2):
+ res |= ord(a) ^ ord(b)
+ return res == 0
diff --git a/wcs_es/wcs_api.py b/wcs_es/wcs_api.py
new file mode 100644
index 0000000..f14ba1e
--- /dev/null
+++ b/wcs_es/wcs_api.py
@@ -0,0 +1,96 @@
+import requests
+import urlparse
+import urllib
+
+from . import signature
+
+class WcsApiError(Exception):
+ pass
+
+
+class BaseObject(object):
+ def __init__(self, wcs_api, **kwargs):
+ self.__wcs_api = wcs_api
+ self.__dict__.update(**kwargs)
+
+ def json(self):
+ d = self.__dict__.copy()
+ for key in d.keys():
+ if key.startswith('_'):
+ del d[key]
+ return d
+
+
+class FormData(BaseObject):
+ def json(self):
+ d = super(FormData, self).json()
+ formdef = d.pop('formdef')
+ d['formdef_slug'] = formdef.slug
+ return d
+
+ def __repr__(self):
+ return '<{klass} {display_id!r}>'.format(klass=self.__class__.__name__,
+ display_id=self.id)
+
+class FormDef(BaseObject):
+ def __init__(self, wcs_api, **kwargs):
+ self.__wcs_api = wcs_api
+ self.__dict__.update(**kwargs)
+
+ def __unicode__(self):
+ return self.title
+
+ @property
+ def datas(self):
+ datas = self.__wcs_api.get_formdata(self.slug)
+ for data in datas:
+ data.formdef = self
+ return datas
+
+ def __repr__(self):
+ return '<{klass} {slug!r}>'.format(klass=self.__class__.__name__, slug=self.slug)
+
+
+class WcsApi(object):
+ def __init__(self, url, orig, key, name_id=None, email=None, verify=False):
+ self.url = url
+ self.orig = orig
+ self.key = key
+ self.email = email
+ self.name_id = name_id
+ self.verify = verify
+
+ @property
+ def formdefs_url(self):
+ return urlparse.urljoin(self.url, 'api/formdefs/')
+
+ @property
+ def forms_url(self):
+ return urlparse.urljoin(self.url, 'api/forms/')
+
+ def get_json(self, *url_parts):
+ url = reduce(lambda x, y: urlparse.urljoin(x, y), url_parts)
+ params = {'orig': self.orig}
+ if self.email:
+ params['email'] = self.email
+ if self.name_id:
+ params['NameID'] = self.name_id
+ query_string = urllib.urlencode(params)
+ presigned_url = url + ('&' if '?' in url else '?') + query_string
+ signed_url = signature.sign_url(presigned_url, self.key)
+ try:
+ response = requests.get(signed_url, verify=False)
+ except requests.RequestException, e:
+ raise WcsApiError('GET request failed', signed_url, e)
+ else:
+ try:
+ return response.json()
+ except ValueError, e:
+ raise WcsApiError('Invalid JSON content', signed_url, e)
+
+ def get_formdefs(self):
+ return [FormDef(wcs_api=self, **d) for d in self.get_json(self.formdefs_url)]
+
+ def get_formdata(self, slug):
+ return [FormData(wcs_api=self, **d) for d in self.get_json(self.forms_url, slug + '/',
+ 'list?full=on')]