wcs/wcs/qommon/http_request.py

239 lines
8.3 KiB
Python

# w.c.s. - web application for online forms
# Copyright (C) 2005-2010 Entr'ouvert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
import base64
import copy
import re
import time
import quixote.http_request
from django.utils.encoding import force_bytes, force_text
from quixote import get_publisher, get_session
from quixote.errors import RequestError
from .http_response import HTTPResponse
class HTTPRequest(quixote.http_request.HTTPRequest):
signed = False
parsed = False
django_request = None
def __init__(self, *args, **kwargs):
quixote.http_request.HTTPRequest.__init__(self, *args, **kwargs)
self.response = HTTPResponse()
self.charset = get_publisher().site_charset
self.is_json_marker = None
self.ignore_session = False
self.wscalls_cache = {}
self.datasources_cache = {}
# keep a copy of environment to make sure it's not reused along
# uwsgi/gunicorn processes.
self.environ = copy.copy(self.environ)
_user = () # use empty tuple instead of None as None is a "valid" user value
def get_user(self):
if self._user != ():
return self._user
auth_header = self.get_header('Authorization', '')
if auth_header.startswith('Basic '):
auth_header = auth_header.split(' ', 1)[1]
try:
username, password = force_text(base64.decodebytes(force_bytes(auth_header))).split(':', 1)
except (UnicodeDecodeError, ValueError):
# ValueError will catch both missing ":" (not enough values to
# unpack (expected 2, got 1)) and binascii.Error (incorrect
# padding or invalid base64-encoded string).
self._user = None
return
from wcs.api_access import ApiAccess
from .ident.password_accounts import PasswordAccount
try:
self._user = PasswordAccount.get_with_credentials(username, password)
except KeyError:
try:
self._user = ApiAccess.get_with_credentials(username, password)
except KeyError:
self._user = None
return
try:
self._user = get_session().get_user()
except AttributeError:
self._user = None
return self._user
user = property(get_user)
def get_server(self, clean=True):
server_name = quixote.http_request.HTTPRequest.get_server(self)
if clean and server_name.lower()[:7] in ('iframe.', 'iframe-'):
server_name = server_name[7:]
return server_name
def get_local_url(self, n=0):
'''Return the local part of the URL, query string included'''
query = self.get_query()
if query:
return self.get_path(n) + '?' + query
else:
return self.get_path(n)
def get_frontoffice_url(self, n=0):
return get_publisher().get_frontoffice_url(without_script_name=True) + self.get_local_url(n)
def get_substitution_variables(self):
# environment variables APPNAME_* are exported to env_*
from wcs.variables import LazyRequest
prefix = get_publisher().APP_NAME.lower() + '_'
variables = {'request': LazyRequest(self)}
for k, v in self.environ.items():
if k.lower().startswith(prefix):
variables['env_' + k.lower()[len(prefix) :]] = v
return variables
def dump(self):
# straight copy of HTTPRequest.dump(), sole modification is that the
# values are printed as %r, not %s
result = []
row = '%-15s %r'
if self.form:
result.append("Form:")
for k, v in sorted(self.form.items()):
result.append(row % (k, v))
result.append("")
result.append("Cookies:")
for k, v in sorted(self.cookies.items()):
result.append(row % (k, v))
result.append("")
result.append("Environment:")
for k, v in sorted(self.environ.items()):
result.append(row % (k, v))
return "\n".join(result)
def process_inputs(self):
if self.parsed:
return
query = self.get_query()
if query:
self.form.update(quixote.http_request.parse_query(query, self.charset))
length = self.environ.get('CONTENT_LENGTH') or '0'
try:
length = int(length)
except ValueError:
raise RequestError('invalid content-length header')
ctype = self.environ.get("CONTENT_TYPE")
if self.django_request:
self.stdin = self.django_request
if ctype:
ctype, ctype_params = quixote.http_request.parse_header(ctype)
if ctype == 'application/x-www-form-urlencoded':
self._process_urlencoded(length, ctype_params)
elif ctype == 'multipart/form-data':
self._process_multipart(length, ctype_params)
elif ctype == 'application/json' and self.stdin:
from .misc import json_loads
length = int(self.environ.get('CONTENT_LENGTH') or '0')
if length:
payload = self.stdin.read(length)
try:
self._json = json_loads(payload)
except ValueError as e:
raise RequestError('invalid json payload (%s)' % str(e))
else:
# consider empty post as an empty dictionary
self._json = {}
# remove characters that are not valid XML so it doesn't have to happen
# down the chain.
illegal_xml_chars = re.compile(r'[\x00-\x08\x0b\x0c\x0e-\x1f\ud800-\udfff\ufffe\uffff]')
self.form = dict(
(k, illegal_xml_chars.sub('', v) if isinstance(v, str) else v) for k, v in self.form.items()
)
self.parsed = True
@property
def json(self):
if not hasattr(self, '_json'):
raise RequestError('expected JSON but missing appropriate content-type')
return self._json
def is_json(self):
if self.is_json_marker:
return True
if self.get_header('Content-Type', '').strip() == 'application/json':
return True
if self.get_header('Accept', '').strip() == 'application/json':
return True
if self.get_query() == 'json':
return True
if self.form and self.form.get('format') == 'json':
return True
return False
def is_in_backoffice(self):
return self.get_path().startswith('/backoffice/')
def is_api_url(self):
return self.get_path().startswith('/api/')
def is_in_frontoffice(self):
return not (self.is_in_backoffice() or self.is_api_url())
def is_from_bot(self):
from .logger import BotFilter
return BotFilter.is_bot(request=self)
def is_from_application(self):
# detect calls made from other applications or debug tools
# this is not to detect bots (is_from_bot above)
user_agent = self.get_environ('HTTP_USER_AGENT', '')
return (
user_agent.startswith('python-requests')
or user_agent.startswith('curl')
or user_agent.startswith('Wget')
)
def has_anonymised_data_api_restriction(self):
from wcs.api_access import ApiAccess
if 'anonymise' in self.form:
return True
orig = self.form.get('orig')
if orig:
api_access = ApiAccess.get_by_identifier(orig)
if api_access:
return api_access.restrict_to_anonymised_data
return False
@property
def META(self):
return self.environ
def trace(self, msg):
print('%.4f' % (time.time() - self.t0), msg)