wcs/wcs/qommon/http_request.py

# w.c.s. - web application for online forms
# Copyright (C) 2005-2010  Entr'ouvert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

import base64
import copy
import re
import time

import quixote.http_request
from django.utils.encoding import force_bytes, force_text
from quixote import get_publisher, get_session
from quixote.errors import RequestError

from .http_response import HTTPResponse


class HTTPRequest(quixote.http_request.HTTPRequest):
    signed = False
    parsed = False
    django_request = None

    def __init__(self, *args, **kwargs):
        quixote.http_request.HTTPRequest.__init__(self, *args, **kwargs)
        self.response = HTTPResponse()
        self.charset = get_publisher().site_charset
        self.is_json_marker = None
        self.ignore_session = False
        self.wscalls_cache = {}
        self.datasources_cache = {}
        # keep a copy of environment to make sure it's not reused along
        # uwsgi/gunicorn processes.
        self.environ = copy.copy(self.environ)

    _user = ()  # use empty tuple instead of None as None is a "valid" user value

    def get_user(self):
        if self._user != ():
            return self._user

        auth_header = self.get_header('Authorization', '')
        if auth_header.startswith('Basic '):
            auth_header = auth_header.split(' ', 1)[1]
            try:
                username, password = force_text(base64.decodebytes(force_bytes(auth_header))).split(':', 1)
            except (UnicodeDecodeError, ValueError):
                # ValueError will catch both missing ":" (not enough values to
                # unpack (expected 2, got 1)) and binascii.Error (incorrect
                # padding or invalid base64-encoded string).
                self._user = None
                return

            from wcs.api_access import ApiAccess

            from .ident.password_accounts import PasswordAccount

            try:
                self._user = PasswordAccount.get_with_credentials(username, password)
            except KeyError:
                try:
                    self._user = ApiAccess.get_with_credentials(username, password)
                except KeyError:
                    self._user = None

            return

        try:
            self._user = get_session().get_user()
        except AttributeError:
            self._user = None
        return self._user

    user = property(get_user)

    def get_server(self, clean=True):
        server_name = quixote.http_request.HTTPRequest.get_server(self)
        if clean and server_name.lower()[:7] in ('iframe.', 'iframe-'):
            server_name = server_name[7:]
        return server_name

    def get_local_url(self, n=0):
        '''Return the local part of the URL, query string included'''
        query = self.get_query()
        if query:
            return self.get_path(n) + '?' + query
        else:
            return self.get_path(n)

    def get_frontoffice_url(self, n=0):
        return get_publisher().get_frontoffice_url(without_script_name=True) + self.get_local_url(n)

    def get_substitution_variables(self):
        # environment variables APPNAME_* are exported to env_*
        from wcs.variables import LazyRequest

        prefix = get_publisher().APP_NAME.lower() + '_'
        variables = {'request': LazyRequest(self)}
        for k, v in self.environ.items():
            if k.lower().startswith(prefix):
                variables['env_' + k.lower()[len(prefix) :]] = v
        return variables

    def dump(self):
        # straight copy of HTTPRequest.dump(), sole modification is that the
        # values are printed as %r, not %s
        result = []
        row = '%-15s %r'

        if self.form:
            result.append("Form:")
            for k, v in sorted(self.form.items()):
                result.append(row % (k, v))

        result.append("")
        result.append("Cookies:")
        for k, v in sorted(self.cookies.items()):
            result.append(row % (k, v))

        result.append("")
        result.append("Environment:")
        for k, v in sorted(self.environ.items()):
            result.append(row % (k, v))
        return "\n".join(result)

    def process_inputs(self):
        if self.parsed:
            return
        query = self.get_query()
        if query:
            self.form.update(quixote.http_request.parse_query(query, self.charset))
        length = self.environ.get('CONTENT_LENGTH') or '0'
        try:
            length = int(length)
        except ValueError:
            raise RequestError('invalid content-length header')
        ctype = self.environ.get("CONTENT_TYPE")
        if self.django_request:
            self.stdin = self.django_request
        if ctype:
            ctype, ctype_params = quixote.http_request.parse_header(ctype)
        if ctype == 'application/x-www-form-urlencoded':
            self._process_urlencoded(length, ctype_params)
        elif ctype == 'multipart/form-data':
            self._process_multipart(length, ctype_params)
        elif ctype == 'application/json' and self.stdin:
            from .misc import json_loads

            length = int(self.environ.get('CONTENT_LENGTH') or '0')
            if length:
                payload = self.stdin.read(length)
                try:
                    self._json = json_loads(payload)
                except ValueError as e:
                    raise RequestError('invalid json payload (%s)' % str(e))
            else:
                # consider empty post as an empty dictionary
                self._json = {}
        # remove characters that are not valid XML so it doesn't have to happen
        # down the chain.
        illegal_xml_chars = re.compile(r'[\x00-\x08\x0b\x0c\x0e-\x1f\ud800-\udfff\ufffe\uffff]')
        self.form = dict(
            (k, illegal_xml_chars.sub('', v) if isinstance(v, str) else v) for k, v in self.form.items()
        )
        self.parsed = True

    @property
    def json(self):
        if not hasattr(self, '_json'):
            raise RequestError('expected JSON but missing appropriate content-type')
        return self._json

    def is_json(self):
        if self.is_json_marker:
            return True
        if self.get_header('Content-Type', '').strip() == 'application/json':
            return True
        if self.get_header('Accept', '').strip() == 'application/json':
            return True
        if self.get_query() == 'json':
            return True
        if self.form and self.form.get('format') == 'json':
            return True
        return False

    def is_in_backoffice(self):
        return self.get_path().startswith('/backoffice/')

    def is_api_url(self):
        return self.get_path().startswith('/api/')

    def is_in_frontoffice(self):
        return not (self.is_in_backoffice() or self.is_api_url())

    def is_from_bot(self):
        from .logger import BotFilter

        return BotFilter.is_bot(request=self)

    def is_from_application(self):
        # detect calls made from other applications or debug tools
        # this is not to detect bots (is_from_bot above)
        user_agent = self.get_environ('HTTP_USER_AGENT', '')
        return (
            user_agent.startswith('python-requests')
            or user_agent.startswith('curl')
            or user_agent.startswith('Wget')
        )

    def has_anonymised_data_api_restriction(self):
        from wcs.api_access import ApiAccess

        if 'anonymise' in self.form:
            return True
        orig = self.form.get('orig')
        if orig:
            api_access = ApiAccess.get_by_identifier(orig)
            if api_access:
                return api_access.restrict_to_anonymised_data
        return False

    @property
    def META(self):
        return self.environ

    def trace(self, msg):
        print('%.4f' % (time.time() - self.t0), msg)