239 lines
8.3 KiB
Python
239 lines
8.3 KiB
Python
# w.c.s. - web application for online forms
|
|
# Copyright (C) 2005-2010 Entr'ouvert
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import base64
|
|
import copy
|
|
import re
|
|
import time
|
|
|
|
import quixote.http_request
|
|
from django.utils.encoding import force_bytes, force_text
|
|
from quixote import get_publisher, get_session
|
|
from quixote.errors import RequestError
|
|
|
|
from .http_response import HTTPResponse
|
|
|
|
|
|
class HTTPRequest(quixote.http_request.HTTPRequest):
|
|
signed = False
|
|
parsed = False
|
|
django_request = None
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
quixote.http_request.HTTPRequest.__init__(self, *args, **kwargs)
|
|
self.response = HTTPResponse()
|
|
self.charset = get_publisher().site_charset
|
|
self.is_json_marker = None
|
|
self.ignore_session = False
|
|
self.wscalls_cache = {}
|
|
self.datasources_cache = {}
|
|
# keep a copy of environment to make sure it's not reused along
|
|
# uwsgi/gunicorn processes.
|
|
self.environ = copy.copy(self.environ)
|
|
|
|
_user = () # use empty tuple instead of None as None is a "valid" user value
|
|
|
|
def get_user(self):
|
|
if self._user != ():
|
|
return self._user
|
|
|
|
auth_header = self.get_header('Authorization', '')
|
|
if auth_header.startswith('Basic '):
|
|
auth_header = auth_header.split(' ', 1)[1]
|
|
try:
|
|
username, password = force_text(base64.decodebytes(force_bytes(auth_header))).split(':', 1)
|
|
except (UnicodeDecodeError, ValueError):
|
|
# ValueError will catch both missing ":" (not enough values to
|
|
# unpack (expected 2, got 1)) and binascii.Error (incorrect
|
|
# padding or invalid base64-encoded string).
|
|
self._user = None
|
|
return
|
|
|
|
from wcs.api_access import ApiAccess
|
|
|
|
from .ident.password_accounts import PasswordAccount
|
|
|
|
try:
|
|
self._user = PasswordAccount.get_with_credentials(username, password)
|
|
except KeyError:
|
|
try:
|
|
self._user = ApiAccess.get_with_credentials(username, password)
|
|
except KeyError:
|
|
self._user = None
|
|
|
|
return
|
|
|
|
try:
|
|
self._user = get_session().get_user()
|
|
except AttributeError:
|
|
self._user = None
|
|
return self._user
|
|
|
|
user = property(get_user)
|
|
|
|
def get_server(self, clean=True):
|
|
server_name = quixote.http_request.HTTPRequest.get_server(self)
|
|
if clean and server_name.lower()[:7] in ('iframe.', 'iframe-'):
|
|
server_name = server_name[7:]
|
|
return server_name
|
|
|
|
def get_local_url(self, n=0):
|
|
'''Return the local part of the URL, query string included'''
|
|
query = self.get_query()
|
|
if query:
|
|
return self.get_path(n) + '?' + query
|
|
else:
|
|
return self.get_path(n)
|
|
|
|
def get_frontoffice_url(self, n=0):
|
|
return get_publisher().get_frontoffice_url(without_script_name=True) + self.get_local_url(n)
|
|
|
|
def get_substitution_variables(self):
|
|
# environment variables APPNAME_* are exported to env_*
|
|
from wcs.variables import LazyRequest
|
|
|
|
prefix = get_publisher().APP_NAME.lower() + '_'
|
|
variables = {'request': LazyRequest(self)}
|
|
for k, v in self.environ.items():
|
|
if k.lower().startswith(prefix):
|
|
variables['env_' + k.lower()[len(prefix) :]] = v
|
|
return variables
|
|
|
|
def dump(self):
|
|
# straight copy of HTTPRequest.dump(), sole modification is that the
|
|
# values are printed as %r, not %s
|
|
result = []
|
|
row = '%-15s %r'
|
|
|
|
if self.form:
|
|
result.append("Form:")
|
|
for k, v in sorted(self.form.items()):
|
|
result.append(row % (k, v))
|
|
|
|
result.append("")
|
|
result.append("Cookies:")
|
|
for k, v in sorted(self.cookies.items()):
|
|
result.append(row % (k, v))
|
|
|
|
result.append("")
|
|
result.append("Environment:")
|
|
for k, v in sorted(self.environ.items()):
|
|
result.append(row % (k, v))
|
|
return "\n".join(result)
|
|
|
|
def process_inputs(self):
|
|
if self.parsed:
|
|
return
|
|
query = self.get_query()
|
|
if query:
|
|
self.form.update(quixote.http_request.parse_query(query, self.charset))
|
|
length = self.environ.get('CONTENT_LENGTH') or '0'
|
|
try:
|
|
length = int(length)
|
|
except ValueError:
|
|
raise RequestError('invalid content-length header')
|
|
ctype = self.environ.get("CONTENT_TYPE")
|
|
if self.django_request:
|
|
self.stdin = self.django_request
|
|
if ctype:
|
|
ctype, ctype_params = quixote.http_request.parse_header(ctype)
|
|
if ctype == 'application/x-www-form-urlencoded':
|
|
self._process_urlencoded(length, ctype_params)
|
|
elif ctype == 'multipart/form-data':
|
|
self._process_multipart(length, ctype_params)
|
|
elif ctype == 'application/json' and self.stdin:
|
|
from .misc import json_loads
|
|
|
|
length = int(self.environ.get('CONTENT_LENGTH') or '0')
|
|
if length:
|
|
payload = self.stdin.read(length)
|
|
try:
|
|
self._json = json_loads(payload)
|
|
except ValueError as e:
|
|
raise RequestError('invalid json payload (%s)' % str(e))
|
|
else:
|
|
# consider empty post as an empty dictionary
|
|
self._json = {}
|
|
# remove characters that are not valid XML so it doesn't have to happen
|
|
# down the chain.
|
|
illegal_xml_chars = re.compile(r'[\x00-\x08\x0b\x0c\x0e-\x1f\ud800-\udfff\ufffe\uffff]')
|
|
self.form = dict(
|
|
(k, illegal_xml_chars.sub('', v) if isinstance(v, str) else v) for k, v in self.form.items()
|
|
)
|
|
self.parsed = True
|
|
|
|
@property
|
|
def json(self):
|
|
if not hasattr(self, '_json'):
|
|
raise RequestError('expected JSON but missing appropriate content-type')
|
|
return self._json
|
|
|
|
def is_json(self):
|
|
if self.is_json_marker:
|
|
return True
|
|
if self.get_header('Content-Type', '').strip() == 'application/json':
|
|
return True
|
|
if self.get_header('Accept', '').strip() == 'application/json':
|
|
return True
|
|
if self.get_query() == 'json':
|
|
return True
|
|
if self.form and self.form.get('format') == 'json':
|
|
return True
|
|
return False
|
|
|
|
def is_in_backoffice(self):
|
|
return self.get_path().startswith('/backoffice/')
|
|
|
|
def is_api_url(self):
|
|
return self.get_path().startswith('/api/')
|
|
|
|
def is_in_frontoffice(self):
|
|
return not (self.is_in_backoffice() or self.is_api_url())
|
|
|
|
def is_from_bot(self):
|
|
from .logger import BotFilter
|
|
|
|
return BotFilter.is_bot(request=self)
|
|
|
|
def is_from_application(self):
|
|
# detect calls made from other applications or debug tools
|
|
# this is not to detect bots (is_from_bot above)
|
|
user_agent = self.get_environ('HTTP_USER_AGENT', '')
|
|
return (
|
|
user_agent.startswith('python-requests')
|
|
or user_agent.startswith('curl')
|
|
or user_agent.startswith('Wget')
|
|
)
|
|
|
|
def has_anonymised_data_api_restriction(self):
|
|
from wcs.api_access import ApiAccess
|
|
|
|
if 'anonymise' in self.form:
|
|
return True
|
|
orig = self.form.get('orig')
|
|
if orig:
|
|
api_access = ApiAccess.get_by_identifier(orig)
|
|
if api_access:
|
|
return api_access.restrict_to_anonymised_data
|
|
return False
|
|
|
|
@property
|
|
def META(self):
|
|
return self.environ
|
|
|
|
def trace(self, msg):
|
|
print('%.4f' % (time.time() - self.t0), msg)
|