first commit
This commit is contained in:
commit
4c7909e023
|
@ -0,0 +1,65 @@
|
||||||
|
#! /usr/bin/env python
|
||||||
|
|
||||||
|
''' Setup script for wcs-es
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
from distutils.command.sdist import sdist as _sdist
|
||||||
|
|
||||||
|
|
||||||
|
class eo_sdist(_sdist):
|
||||||
|
def run(self):
|
||||||
|
if os.path.exists('VERSION'):
|
||||||
|
os.remove('VERSION')
|
||||||
|
version = get_version()
|
||||||
|
version_file = open('VERSION', 'w')
|
||||||
|
version_file.write(version)
|
||||||
|
version_file.close()
|
||||||
|
_sdist.run(self)
|
||||||
|
if os.path.exists('VERSION'):
|
||||||
|
os.remove('VERSION')
|
||||||
|
|
||||||
|
|
||||||
|
def get_version():
|
||||||
|
'''Use the VERSION, if absent generates a version with git describe, if not
|
||||||
|
tag exists, take 0.0.0- and add the length of the commit log.
|
||||||
|
'''
|
||||||
|
if os.path.exists('VERSION'):
|
||||||
|
with open('VERSION', 'r') as v:
|
||||||
|
return v.read()
|
||||||
|
if os.path.exists('.git'):
|
||||||
|
p = subprocess.Popen(['git', 'describe', '--dirty', '--match=v*'], stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE)
|
||||||
|
result = p.communicate()[0]
|
||||||
|
if p.returncode == 0:
|
||||||
|
return result.split()[0][1:].replace('-', '.')
|
||||||
|
else:
|
||||||
|
return '0.0.0-%s' % len(subprocess.check_output(
|
||||||
|
['git', 'rev-list', 'HEAD']).splitlines())
|
||||||
|
return '0.0.0'
|
||||||
|
|
||||||
|
setup(name="wcs-es",
|
||||||
|
version=get_version(),
|
||||||
|
license="AGPLv3 or later",
|
||||||
|
description="W.C.S. ElasticSearch feeder",
|
||||||
|
long_description=file('README').read(),
|
||||||
|
url="http://dev.entrouvert.org/projects/wcs-es/",
|
||||||
|
author="Entr'ouvert",
|
||||||
|
author_email="info@entrouvert.org",
|
||||||
|
include_package_data=True,
|
||||||
|
packages=find_packages(),
|
||||||
|
install_requires=[
|
||||||
|
'requests',
|
||||||
|
'elasticsearch',
|
||||||
|
],
|
||||||
|
setup_requires=[
|
||||||
|
],
|
||||||
|
tests_require=[
|
||||||
|
'pytest',
|
||||||
|
],
|
||||||
|
dependency_links=[],
|
||||||
|
cmdclass={
|
||||||
|
'sdist': eo_sdist,
|
||||||
|
})
|
|
@ -0,0 +1,57 @@
|
||||||
|
from . import wcs_api
|
||||||
|
|
||||||
|
def provision(api, es, index_name, recreate=False):
|
||||||
|
if recreate and es.indices.exists(index_name):
|
||||||
|
es.indices.delete(index_name)
|
||||||
|
for formdef in api.get_formdefs():
|
||||||
|
es.create(index=index_name, doc_type='formdef', body=formdef.json(), id=formdef.slug)
|
||||||
|
try:
|
||||||
|
for data in formdef.datas:
|
||||||
|
es.create(index_name, doc_type='form-'+formdef.slug, body=data.json(),
|
||||||
|
id=data.display_id)
|
||||||
|
print formdef.slug, 'populated'
|
||||||
|
except wcs_api.WcsApiError:
|
||||||
|
print '!!!', formdef.slug, 'failed'
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import argparse
|
||||||
|
import ConfigParser
|
||||||
|
import os
|
||||||
|
import elasticsearch
|
||||||
|
import urlparse
|
||||||
|
|
||||||
|
config = ConfigParser.ConfigParser()
|
||||||
|
config_file = os.path.expanduser('~/.wcs_es.ini')
|
||||||
|
if os.path.exists(config_file):
|
||||||
|
config.read(config_file)
|
||||||
|
urls = [url for url in config.sections() if url.startswith('http://') or
|
||||||
|
url.startswith('https://')]
|
||||||
|
parser = argparse.ArgumentParser(description='Provision ES with W.C.S. data')
|
||||||
|
parser.add_argument('--url', help='url of the w.c.s. instance', required=not urls,
|
||||||
|
default=(urls or [None])[0])
|
||||||
|
args, rest = parser.parse_known_args()
|
||||||
|
defaults = {}
|
||||||
|
if getattr(args, 'url') and config.has_section(args.url):
|
||||||
|
defaults = dict(config.items(args.url))
|
||||||
|
parser.add_argument('--orig', help='origin of the request for signatures',
|
||||||
|
required='orig' not in defaults)
|
||||||
|
parser.add_argument('--key', help='HMAC key for signatures', required='key' not in defaults)
|
||||||
|
group = parser.add_mutually_exclusive_group(
|
||||||
|
required='email' not in defaults and 'name_id' not in defaults)
|
||||||
|
group.add_argument('--email', help='email for authentication')
|
||||||
|
group.add_argument('--name-id', help='NameID for authentication')
|
||||||
|
parser.add_argument('--es-host', help='ElasticSearch hostname', default='localhost')
|
||||||
|
parser.add_argument('--es-port', help='ElasticSearch hostname', type=int, default=9200)
|
||||||
|
parser.add_argument('--es-no-recreate', help='ElasticSearch hostname', dest='es_recreate',
|
||||||
|
default=True, action='store_false')
|
||||||
|
if defaults:
|
||||||
|
parser.set_defaults(**defaults)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
api = wcs_api.WcsApi(url=args.url, orig=args.orig, key=args.key, email=args.email,
|
||||||
|
name_id=args.name_id)
|
||||||
|
index_name = urlparse.urlparse(args.url).netloc.split(':')[0].replace('.', '_')
|
||||||
|
es_hosts = [{'host': args.es_host, 'port': args.es_port, 'use_ssl': False}]
|
||||||
|
print es_hosts
|
||||||
|
es = elasticsearch.Elasticsearch(es_hosts)
|
||||||
|
provision(api, es, index_name, recreate=args.es_recreate)
|
|
@ -0,0 +1,72 @@
|
||||||
|
import datetime
|
||||||
|
import base64
|
||||||
|
import hmac
|
||||||
|
import hashlib
|
||||||
|
import urllib
|
||||||
|
import random
|
||||||
|
import urlparse
|
||||||
|
|
||||||
|
'''Simple signature scheme for query strings'''
|
||||||
|
|
||||||
|
|
||||||
|
def sign_url(url, key, algo='sha256', timestamp=None, nonce=None):
|
||||||
|
parsed = urlparse.urlparse(url)
|
||||||
|
new_query = sign_query(parsed.query, key, algo, timestamp, nonce)
|
||||||
|
return urlparse.urlunparse(parsed[:4] + (new_query,) + parsed[5:])
|
||||||
|
|
||||||
|
|
||||||
|
def sign_query(query, key, algo='sha256', timestamp=None, nonce=None):
|
||||||
|
if timestamp is None:
|
||||||
|
timestamp = datetime.datetime.utcnow()
|
||||||
|
timestamp = timestamp.strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||||
|
if nonce is None:
|
||||||
|
nonce = hex(random.SystemRandom().getrandbits(128))[2:-1]
|
||||||
|
new_query = query
|
||||||
|
if new_query:
|
||||||
|
new_query += '&'
|
||||||
|
new_query += urllib.urlencode((
|
||||||
|
('algo', algo),
|
||||||
|
('timestamp', timestamp),
|
||||||
|
('nonce', nonce)))
|
||||||
|
signature = base64.b64encode(sign_string(new_query, key, algo=algo))
|
||||||
|
new_query += '&signature=' + urllib.quote(signature)
|
||||||
|
return new_query
|
||||||
|
|
||||||
|
|
||||||
|
def sign_string(s, key, algo='sha256', timedelta=30):
|
||||||
|
digestmod = getattr(hashlib, algo)
|
||||||
|
if isinstance(key, unicode):
|
||||||
|
key = key.encode('utf-8')
|
||||||
|
hash = hmac.HMAC(key, digestmod=digestmod, msg=s)
|
||||||
|
return hash.digest()
|
||||||
|
|
||||||
|
|
||||||
|
def check_url(url, key, known_nonce=None, timedelta=30):
|
||||||
|
parsed = urlparse.urlparse(url, 'https')
|
||||||
|
return check_query(parsed.query, key)
|
||||||
|
|
||||||
|
|
||||||
|
def check_query(query, key, known_nonce=None, timedelta=30):
|
||||||
|
parsed = urlparse.parse_qs(query)
|
||||||
|
signature = base64.b64decode(parsed['signature'][0])
|
||||||
|
algo = parsed['algo'][0]
|
||||||
|
timestamp = parsed['timestamp'][0]
|
||||||
|
timestamp = datetime.datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ')
|
||||||
|
nonce = parsed['nonce']
|
||||||
|
unsigned_query = query.split('&signature=')[0]
|
||||||
|
if known_nonce is not None and known_nonce(nonce):
|
||||||
|
return False
|
||||||
|
if abs(datetime.datetime.utcnow() - timestamp) > datetime.timedelta(seconds=timedelta):
|
||||||
|
return False
|
||||||
|
return check_string(unsigned_query, signature, key, algo=algo)
|
||||||
|
|
||||||
|
|
||||||
|
def check_string(s, signature, key, algo='sha256'):
|
||||||
|
# constant time compare
|
||||||
|
signature2 = sign_string(s, key, algo=algo)
|
||||||
|
if len(signature2) != len(signature):
|
||||||
|
return False
|
||||||
|
res = 0
|
||||||
|
for a, b in zip(signature, signature2):
|
||||||
|
res |= ord(a) ^ ord(b)
|
||||||
|
return res == 0
|
|
@ -0,0 +1,96 @@
|
||||||
|
import requests
|
||||||
|
import urlparse
|
||||||
|
import urllib
|
||||||
|
|
||||||
|
from . import signature
|
||||||
|
|
||||||
|
class WcsApiError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class BaseObject(object):
|
||||||
|
def __init__(self, wcs_api, **kwargs):
|
||||||
|
self.__wcs_api = wcs_api
|
||||||
|
self.__dict__.update(**kwargs)
|
||||||
|
|
||||||
|
def json(self):
|
||||||
|
d = self.__dict__.copy()
|
||||||
|
for key in d.keys():
|
||||||
|
if key.startswith('_'):
|
||||||
|
del d[key]
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
class FormData(BaseObject):
|
||||||
|
def json(self):
|
||||||
|
d = super(FormData, self).json()
|
||||||
|
formdef = d.pop('formdef')
|
||||||
|
d['formdef_slug'] = formdef.slug
|
||||||
|
return d
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<{klass} {display_id!r}>'.format(klass=self.__class__.__name__,
|
||||||
|
display_id=self.id)
|
||||||
|
|
||||||
|
class FormDef(BaseObject):
|
||||||
|
def __init__(self, wcs_api, **kwargs):
|
||||||
|
self.__wcs_api = wcs_api
|
||||||
|
self.__dict__.update(**kwargs)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return self.title
|
||||||
|
|
||||||
|
@property
|
||||||
|
def datas(self):
|
||||||
|
datas = self.__wcs_api.get_formdata(self.slug)
|
||||||
|
for data in datas:
|
||||||
|
data.formdef = self
|
||||||
|
return datas
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<{klass} {slug!r}>'.format(klass=self.__class__.__name__, slug=self.slug)
|
||||||
|
|
||||||
|
|
||||||
|
class WcsApi(object):
|
||||||
|
def __init__(self, url, orig, key, name_id=None, email=None, verify=False):
|
||||||
|
self.url = url
|
||||||
|
self.orig = orig
|
||||||
|
self.key = key
|
||||||
|
self.email = email
|
||||||
|
self.name_id = name_id
|
||||||
|
self.verify = verify
|
||||||
|
|
||||||
|
@property
|
||||||
|
def formdefs_url(self):
|
||||||
|
return urlparse.urljoin(self.url, 'api/formdefs/')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def forms_url(self):
|
||||||
|
return urlparse.urljoin(self.url, 'api/forms/')
|
||||||
|
|
||||||
|
def get_json(self, *url_parts):
|
||||||
|
url = reduce(lambda x, y: urlparse.urljoin(x, y), url_parts)
|
||||||
|
params = {'orig': self.orig}
|
||||||
|
if self.email:
|
||||||
|
params['email'] = self.email
|
||||||
|
if self.name_id:
|
||||||
|
params['NameID'] = self.name_id
|
||||||
|
query_string = urllib.urlencode(params)
|
||||||
|
presigned_url = url + ('&' if '?' in url else '?') + query_string
|
||||||
|
signed_url = signature.sign_url(presigned_url, self.key)
|
||||||
|
try:
|
||||||
|
response = requests.get(signed_url, verify=False)
|
||||||
|
except requests.RequestException, e:
|
||||||
|
raise WcsApiError('GET request failed', signed_url, e)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
return response.json()
|
||||||
|
except ValueError, e:
|
||||||
|
raise WcsApiError('Invalid JSON content', signed_url, e)
|
||||||
|
|
||||||
|
def get_formdefs(self):
|
||||||
|
return [FormDef(wcs_api=self, **d) for d in self.get_json(self.formdefs_url)]
|
||||||
|
|
||||||
|
def get_formdata(self, slug):
|
||||||
|
return [FormData(wcs_api=self, **d) for d in self.get_json(self.forms_url, slug + '/',
|
||||||
|
'list?full=on')]
|
Reference in New Issue