passerelle/tests/test_requests.py

748 lines
27 KiB
Python

import logging
from unittest import mock
import mohawk
import pytest
import requests
import responses
from django.test import override_settings
from httmock import HTTMock, response, urlmatch
from responses.registries import OrderedRegistry
from urllib3.exceptions import ConnectionError, ReadTimeoutError
from passerelle.utils import CaseInsensitiveDict, Request, log_http_request
from passerelle.utils.http_authenticators import HawkAuth
from tests.utils import FakedResponse
@pytest.fixture(autouse=True)
def clean_adapter_registry():
Request.ADAPTER_REGISTRY.clear()
class MockFileField:
def __init__(self, path):
self.path = path
class MockResource:
logger = logging.getLogger('requests')
basic_auth_username = ''
basic_auth_password = ''
client_certificate = None
trusted_certificate_authorities = None
verify_cert = True
http_proxy = ''
@classmethod
def get_setting(cls, name):
return None
@pytest.fixture(params=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'])
def log_level(request):
return request.param
@urlmatch(netloc=r'(.*\.)?httpbin\.org$')
def httpbin_mock(url, request):
return response(
200,
{'message': 'Are you really josh ?'},
headers={'Content-Type': 'application/json'},
request=request,
)
@urlmatch(netloc=r'(.*\.)?httperror\.org$')
def http400_mock(url, request):
return response(400, {'foo': 'bar'}, headers={'Content-Type': 'application/json'}, request=request)
def test_log_level(caplog, log_level):
url = 'https://httpbin.org/post'
logger = logging.getLogger('requests')
logger.setLevel(log_level)
with HTTMock(httpbin_mock):
requests = Request(logger=logger)
response = requests.post(url, json={'name': 'josh'})
records = [record for record in caplog.records if record.name == 'requests']
records_length = len(records)
if logger.level > 20:
assert records_length == 0
else:
assert records_length == 1
record = records[0]
assert record.request_url == url
assert record.response_status == response.status_code
assert record.request_duration
if logger.level == 10: # DEBUG
assert record.request_payload == b'{"name": "josh"}'
assert record.response_content == response.content
assert record.response_headers
else:
assert not hasattr(record, 'request_payload')
assert not hasattr(record, 'response_content')
assert not hasattr(record, 'response_headers')
def test_log_error(caplog, log_level):
url = 'https://httperror.org/plop'
logger = logging.getLogger('requests')
logger.setLevel(log_level)
with HTTMock(http400_mock):
requests = Request(logger=logger)
response = requests.post(url, json={'name': 'josh'})
records = [record for record in caplog.records if record.name == 'requests']
records_length = len(records)
if logger.level > 40:
assert records_length == 0
else:
assert records_length == 1
record = records[0]
assert record.request_url == url
assert record.response_status == response.status_code
assert record.request_duration
if logger.level == 10: # DEBUG
assert record.request_payload == b'{"name": "josh"}'
assert record.response_content == response.content
assert record.response_headers
else:
assert not hasattr(record, 'request_payload')
assert not hasattr(record, 'response_content')
assert not hasattr(record, 'response_headers')
def test_log_error_http_max_sizes(caplog, log_level, settings):
url = 'https://httperror.org/plop'
logger = logging.getLogger('requests')
logger.setLevel(log_level)
assert settings.LOGGED_REQUESTS_MAX_SIZE == 4999
assert settings.LOGGED_RESPONSES_MAX_SIZE == 5000
settings.LOGGED_REQUESTS_MAX_SIZE = 8
settings.LOGGED_RESPONSES_MAX_SIZE = 7
with HTTMock(http400_mock):
requests = Request(logger=logger)
requests.post(url, json={'name': 'josh'})
if logger.level == 10: # DEBUG
records = [record for record in caplog.records if record.name == 'requests']
assert records[0].request_payload == b'{"name":'
assert records[0].response_content == b'{"foo":'
@pytest.fixture(params=['xml', 'whatever', 'jpeg', 'pdf'])
def endpoint_response(request):
response_request = mock.Mock(
headers={'Accept': '*/*', 'Authorization': 'Basic dG9rZW46dG9rZW4='}, body=None
)
responses = {
'xml': FakedResponse(
headers={'Content-Type': 'application/xml; charset=charset=utf-8'},
status_code=200,
content='<tests><test>xml test</test></tests>',
request=response_request,
),
'whatever': FakedResponse(
headers={'Content-Type': 'texto/csv'},
status_code=200,
content='username;age\ntoken;10\ncartman:10',
request=response_request,
),
'jpeg': FakedResponse(
headers={'Content-Type': 'image/jpeg'},
status_code=200,
content='binary content to be ignored',
request=response_request,
),
'pdf': FakedResponse(
headers={'Content-Type': 'application/pdf'},
status_code=200,
content='binary content to be ignored',
request=response_request,
),
}
return responses.get(request.param)
@mock.patch('passerelle.utils.RequestSession.send')
def test_skip_content_type(mocked_get, caplog, endpoint_response):
mocked_get.return_value = endpoint_response
logger = logging.getLogger('requests')
logger.setLevel(logging.DEBUG)
requests = Request(logger=logger)
requests.get('http://example.net/whatever')
records = [record for record in caplog.records if record.name == 'requests']
if 'xml' in endpoint_response.headers.get('Content-Type'):
assert len(records) == 1
assert records[0].response_content == '<tests><test>xml test</test></tests>'
else:
assert len(records) == 1
@mock.patch('passerelle.utils.RequestSession.request')
def test_proxies(mocked_get, caplog, endpoint_response):
mocked_get.return_value = endpoint_response
logger = logging.getLogger('requests')
Request(logger=logger).get('http://example.net/whatever')
assert mocked_get.call_args[1].get('proxies') is None
Request(logger=logger).get('http://example.net/whatever', proxies={'http': 'http://proxy'})
assert mocked_get.call_args[1].get('proxies') == {'http': 'http://proxy'}
with override_settings(REQUESTS_PROXIES={'http': 'http://globalproxy'}):
Request(logger=logger).get('http://example.net/whatever')
assert mocked_get.call_args[1].get('proxies') == {'http': 'http://globalproxy'}
Request(logger=logger).get('http://example.net/whatever', proxies={'http': 'http://proxy'})
assert mocked_get.call_args[1].get('proxies') == {'http': 'http://proxy'}
# with a linked resource
resource = MockResource()
request = Request(resource=resource, logger=logger)
request.get('http://example.net/whatever')
assert 'proxies' not in mocked_get.call_args[1]
resource.http_proxy = 'http://resourceproxy'
resource_proxies = {'http': 'http://resourceproxy', 'https': 'http://resourceproxy'}
request.get('http://example.net/whatever')
assert mocked_get.call_args[1].get('proxies') == resource_proxies
request.get('http://example.net/whatever', proxies={'http': 'http://proxy'})
assert mocked_get.call_args[1].get('proxies') == {'http': 'http://proxy'}
with override_settings(REQUESTS_PROXIES={'http': 'http://globalproxy'}):
request.get('http://example.net/whatever')
assert mocked_get.call_args[1].get('proxies') == resource_proxies
request.get('http://example.net/whatever', proxies={'http': 'http://proxy'})
assert mocked_get.call_args[1].get('proxies') == {'http': 'http://proxy'}
resource.http_proxy = None
request.get('http://example.net/whatever')
assert mocked_get.call_args[1].get('proxies') == {'http': 'http://globalproxy'}
request.get('http://example.net/whatever', proxies={'http': 'http://proxy'})
assert mocked_get.call_args[1].get('proxies') == {'http': 'http://proxy'}
@mock.patch('passerelle.utils.RequestSession.request')
def test_resource_auth(mocked_get, caplog, endpoint_response):
mocked_get.return_value = endpoint_response
logger = logging.getLogger('requests')
resource = MockResource()
request = Request(resource=resource, logger=logger)
request.get('http://example.net/whatever')
assert 'auth' not in mocked_get.call_args[1]
request.get('http://example.net/whatever', auth=('user', 'pass'))
assert mocked_get.call_args[1].get('auth') == ('user', 'pass')
resource.basic_auth_username = 'ruser'
resource.basic_auth_password = 'rpass'
request.get('http://example.net/whatever')
assert mocked_get.call_args[1].get('auth') == ('ruser', 'rpass')
request.get('http://example.net/whatever', auth=('user', 'pass'))
assert mocked_get.call_args[1].get('auth') == ('user', 'pass')
request.get('http://example.net/whatever', auth=None)
assert mocked_get.call_args[1].get('auth') is None
@mock.patch('passerelle.utils.RequestSession.send')
def test_resource_hawk_auth(mocked_send, caplog, endpoint_response):
mocked_send.return_value = endpoint_response
logger = logging.getLogger('requests')
resource = MockResource()
request = Request(resource=resource, logger=logger)
credentials = {'id': 'id', 'key': 'key', 'algorithm': 'sha256'}
hawk_auth = HawkAuth(**credentials)
request.get('http://httpbin.org/get', auth=hawk_auth)
prepared_method = mocked_send.call_args[0][0]
assert 'Authorization' in prepared_method.headers
generated_header = prepared_method.headers['Authorization']
sender = mohawk.Sender(
credentials,
nonce=hawk_auth.nonce,
_timestamp=hawk_auth.timestamp,
url='http://httpbin.org/get',
method='GET',
content_type='',
content='',
)
expected_header = sender.request_header
generated_parts = [tuple(e.strip().split('=', 1)) for e in generated_header[5:].split(',')]
expected_parts = [tuple(e.strip().split('=', 1)) for e in expected_header[5:].split(',')]
# compare generated header elements
assert dict(generated_parts) == dict(expected_parts)
hawk_auth = HawkAuth(ext='extra attribute', **credentials)
request.post('http://httpbin.org/post', auth=hawk_auth, json={'key': 'value'})
prepared_method = mocked_send.call_args[0][0]
assert 'Authorization' in prepared_method.headers
generated_header = prepared_method.headers['Authorization']
sender = mohawk.Sender(
credentials,
nonce=hawk_auth.nonce,
_timestamp=hawk_auth.timestamp,
url='http://httpbin.org/post',
method='POST',
content_type='application/json',
content='{"key": "value"}',
ext='extra attribute',
)
expected_header = sender.request_header
generated_parts = [tuple(e.strip().split('=', 1)) for e in generated_header[5:].split(',')]
expected_parts = [tuple(e.strip().split('=', 1)) for e in expected_header[5:].split(',')]
assert dict(generated_parts) == dict(expected_parts)
@mock.patch('passerelle.utils.RequestSession.request')
def test_resource_certificates(mocked_get, caplog, endpoint_response):
mocked_get.return_value = endpoint_response
logger = logging.getLogger('requests')
resource = MockResource()
request = Request(resource=resource, logger=logger)
request.get('http://example.net/whatever')
assert mocked_get.call_args[1].get('verify') is True
assert 'cert' not in mocked_get.call_args[1]
with override_settings(REQUESTS_IGNORE_HTTPS_CERTIFICATE_ERRORS=['example.com']):
request.get('http://example.net/whatever')
assert mocked_get.call_args[1].get('verify') is True
with override_settings(REQUESTS_IGNORE_HTTPS_CERTIFICATE_ERRORS=['example.net']):
request.get('http://example.net/whatever')
assert mocked_get.call_args[1].get('verify') is False
resource.verify_cert = False
request.get('http://example.net/whatever')
assert mocked_get.call_args[1].get('verify') is False
resource.trusted_certificate_authorities = MockFileField('/ca.pem')
request.get('http://example.net/whatever')
assert mocked_get.call_args[1].get('verify') == '/ca.pem'
assert 'cert' not in mocked_get.call_args[1]
request.get('http://example.net/whatever', verify=False)
assert mocked_get.call_args[1].get('verify') is False
resource.client_certificate = MockFileField('/client.pem')
request.get('http://example.net/whatever')
assert mocked_get.call_args[1].get('cert') == '/client.pem'
assert mocked_get.call_args[1].get('verify') == '/ca.pem'
request.get('http://example.net/whatever', cert='/local.pem', verify=False)
assert mocked_get.call_args[1].get('cert') == '/local.pem'
assert mocked_get.call_args[1].get('verify') is False
@mock.patch('passerelle.utils.RequestSession.request')
def test_requests_cache(mocked_get, caplog):
resource = MockResource()
logger = logging.getLogger('requests')
request = Request(resource=resource, logger=logger)
response_request = mock.Mock(headers={'Accept': '*/*'}, body=None)
mocked_get.return_value = FakedResponse(
headers={'Content-Type': 'text/plain; charset=charset=utf-8'},
request=response_request,
content=b'hello world',
status_code=200,
)
# by default there is no cache
assert request.get('http://cache.example.org/').content == b'hello world'
assert request.get('http://cache.example.org/').content == b'hello world'
assert mocked_get.call_count == 2
# add some cache
mocked_get.reset_mock()
assert request.get('http://cache.example.org/', cache_duration=15).content == b'hello world'
assert mocked_get.call_count == 1
assert request.get('http://cache.example.org/', cache_duration=15).content == b'hello world'
assert mocked_get.call_count == 1 # got a cached response
# value changed
mocked_get.return_value = FakedResponse(
headers={'Content-Type': 'text/plain; charset=charset=utf-8'},
request=response_request,
content=b'hello second world',
status_code=200,
)
assert request.get('http://cache.example.org/', cache_duration=15).content == b'hello world'
assert mocked_get.call_count == 1
# force cache invalidation
assert request.get('http://cache.example.org/', invalidate_cache=True).content == b'hello second world'
assert mocked_get.call_count == 2
# do not cache errors
mocked_get.return_value = FakedResponse(
headers={'Content-Type': 'text/plain; charset=charset=utf-8'},
request=response_request,
content=b'no such world',
status_code=404,
)
mocked_get.reset_mock()
response = request.get('http://cache.example.org/404', cache_duration=15)
assert response.content == b'no such world'
assert response.status_code == 404
assert mocked_get.call_count == 1
response = request.get('http://cache.example.org/404', cache_duration=15)
assert mocked_get.call_count == 2
# check response headers
mocked_get.reset_mock()
mocked_get.return_value = FakedResponse(
headers=CaseInsensitiveDict({'Content-Type': 'image/png'}),
request=response_request,
content=b'hello world',
status_code=200,
)
assert (
request.get('http://cache.example.org/img', cache_duration=15).headers.get('content-type')
== 'image/png'
)
assert mocked_get.call_count == 1
assert (
request.get('http://cache.example.org/img', cache_duration=15).headers.get('content-type')
== 'image/png'
)
assert mocked_get.call_count == 1 # got a cached response
@mock.patch('passerelle.utils.RequestSession.request')
def test_timeout(mocked_get, caplog, endpoint_response):
mocked_get.return_value = endpoint_response
logger = logging.getLogger('requests')
Request(logger=logger).get('http://example.net/whatever')
assert mocked_get.call_args[1]['timeout'] == 25
Request(logger=logger).get('http://example.net/whatever', timeout=42)
assert mocked_get.call_args[1]['timeout'] == 42
Request(logger=logger).get('http://example.net/whatever', timeout=None)
assert mocked_get.call_args[1]['timeout'] is None
with override_settings(REQUESTS_TIMEOUT=57):
Request(logger=logger).get('http://example.net/whatever')
assert mocked_get.call_args[1]['timeout'] == 57
Request(logger=logger).get('http://example.net/whatever', timeout=42)
assert mocked_get.call_args[1]['timeout'] == 42
Request(logger=logger).get('http://example.net/whatever', timeout=None)
assert mocked_get.call_args[1]['timeout'] is None
mock_resource = MockResource()
mock_resource.requests_timeout = 58
Request(resource=mock_resource, logger=logger).get('http://example.net/whatever')
assert mocked_get.call_args[1]['timeout'] == 58
Request(resource=mock_resource, logger=logger, timeout=59).get('http://example.net/whatever')
assert mocked_get.call_args[1]['timeout'] == 59
Request(resource=mock_resource, logger=logger, timeout=59).get('http://example.net/whatever', timeout=60)
assert mocked_get.call_args[1]['timeout'] == 60
def test_log_http_request(caplog):
@urlmatch()
def bad_headers(url, request):
return response(200, 'coin', headers={'Error Webservice': b'\xe9'}, request=request)
with HTTMock(bad_headers):
resp = requests.get('https://example.com/', timeout=10)
caplog.set_level(logging.DEBUG)
assert len(caplog.records) == 0
log_http_request(logging.getLogger(), resp.request, resp)
assert len(caplog.records) == 1
extra = {
key: value
for key, value in caplog.records[0].__dict__.items()
if key.startswith(('request_', 'response_'))
}
del extra['request_headers']['User-Agent']
assert extra == {
'request_headers': {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
},
'request_url': 'https://example.com/',
'response_headers': {'Error Webservice': '\ufffd'},
'response_status': 200,
}
@mock.patch('urllib3.connectionpool.HTTPConnectionPool._make_request')
def test_requests_max_retries(mocked_make_request, caplog):
mock_resource = MockResource()
mock_resource.requests_max_retries = {
'total': 3,
'backoff_factor': 0.1,
}
logger = logging.getLogger('requests')
session = Request(resource=mock_resource, logger=logger)
mocked_make_request.side_effect = ReadTimeoutError(pool=None, url=None, message=None)
with pytest.raises(requests.ConnectionError):
session.get('http://example.net/whatever')
assert mocked_make_request.call_count == 4 # 1 base request + 3 retries
@mock.patch('urllib3.connectionpool.HTTPConnectionPool._make_request')
def test_http_max_retries_global(mocked_make_request, settings):
mock_resource = MockResource()
logger = logging.getLogger('requests')
mocked_make_request.side_effect = ReadTimeoutError(pool=None, url=None, message=None)
session = Request(resource=mock_resource, logger=logger)
with pytest.raises(requests.ReadTimeout):
session.get('http://example.net/whatever')
assert mocked_make_request.call_count == 1
Request.ADAPTER_REGISTRY.clear()
mocked_make_request.reset_mock()
settings.REQUESTS_MAX_RETRIES = {
'total': 3,
'read': None,
'backoff_factor': 0.1,
}
session = Request(resource=mock_resource, logger=logger)
with pytest.raises(requests.ConnectionError):
session.get('http://example.net/whatever')
assert mocked_make_request.call_count == 4
@responses.activate
def test_requests_to_legacy_urls(log_level):
responses.add(
responses.GET,
'https://new.org/foobar',
json={'foo': 'bar'},
status=200,
)
logger = logging.getLogger('requests')
logger.setLevel(log_level)
requests = Request(logger=logger)
resp = requests.get('https://old.org/foobar')
assert resp.json() == {'foo': 'bar'}
assert resp.request.url == 'https://new.org/foobar'
@responses.activate
def test_requests_substitution(settings):
from passerelle.base.models import BaseResource
resource = mock.Mock()
resource.requests_max_retries = {}
resource.slug = 'test'
resource.get_connector_slug.return_value = 'cmis'
resource.get_settings = lambda: BaseResource.get_settings(resource)
resource.get_setting = lambda name: BaseResource.get_setting(resource, name)
requests = Request(logger=logging.getLogger(), resource=resource)
settings.CONNECTORS_SETTINGS = {
'cmis/test': {
'requests_substitutions': [
{
'url': 'https://example.com/',
'search': 'http://example.internal',
'replace': 'https://example.com',
}
]
}
}
responses.add(
responses.GET,
'https://example.com/html',
content_type='text/html',
body=b'<html>\n<a href="http://example.internal/path/">\n<a/></html>',
status=200,
)
assert (
requests.get('https://example.com/html?bar=foo', params={'foo': 'bar'}).text
== '<html>\n<a href="https://example.com/path/">\n<a/></html>'
)
responses.add(
responses.GET,
'https://example.com/xml',
content_type='application/xml',
body=b'<a href="http://example.internal/path/"><a/>',
status=200,
)
assert requests.get('https://example.com/xml').text == '<a href="https://example.com/path/"><a/>'
# check substitution is applied inside JSON, even if some characters are escaped
responses.add(
responses.GET,
'https://example.com/json',
content_type='application/json',
body=b'{"url": "http:\\/\\/example.internal/path/"}',
status=200,
)
assert requests.get('https://example.com/json').json() == {'url': 'https://example.com/path/'}
responses.add(
responses.GET,
'https://example.com/binary',
content_type='application/octet-stream',
body=b'\00<a href="http://example.internal/path/"><a/>',
status=200,
)
assert (
requests.get('https://example.com/binary').content
== b'\00<a href="http://example.internal/path/"><a/>'
)
responses.add(
responses.GET,
'https://example.com/binary2',
content_type='',
body=b'\00<a href="http://example.internal/path/"><a/>',
status=200,
)
assert (
requests.get('https://example.com/binary2').content
== b'\00<a href="http://example.internal/path/"><a/>'
)
responses.add(
responses.GET,
'https://example2.com/html',
content_type='text/html',
body=b'<html>\n<a href="http://example.internal/path/">\n<a/></html>',
status=200,
)
# wrong hostname
assert (
requests.get('https://example2.com/html?query=1').text
== '<html>\n<a href="http://example.internal/path/">\n<a/></html>'
)
# check that url field is optional
settings.CONNECTORS_SETTINGS = {
'cmis/test': {
'requests_substitutions': [
{
'search': 'http://example.internal',
'replace': 'https://example.com',
}
]
}
}
responses.add(
responses.GET,
'https://whatever.com/html',
content_type='text/html',
body=b'<html>\n<a href="http://example.internal/path/">\n<a/></html>',
status=200,
)
assert (
requests.get('https://whatever.com/html?bar=foo', params={'foo': 'bar'}).text
== '<html>\n<a href="https://example.com/path/">\n<a/></html>'
)
# check setting is applied per connector slug
resource.get_connector_slug.return_value = 'pas-cmis'
requests = Request(logger=logging.getLogger(), resource=resource)
responses.add(
responses.GET,
'https://example.com/html',
content_type='text/html',
body=b'<html>\n<a href="http://example.internal/path/">\n<a/></html>',
status=200,
)
assert (
requests.get('https://example.com/html?bar=foo', params={'foo': 'bar'}).text
== '<html>\n<a href="http://example.internal/path/">\n<a/></html>'
)
@responses.activate(registry=OrderedRegistry) # pylint: disable=unexpected-keyword-arg,no-value-for-parameter
def test_requests_resource_down():
from passerelle.base.models import BaseResource
resource = mock.Mock()
resource.requests_max_retries = {}
resource.slug = 'test'
resource.get_connector_slug.return_value = 'cmis'
resource.get_settings = lambda: BaseResource.get_settings(resource)
resource.get_setting = lambda name: BaseResource.get_setting(resource, name)
resource.down = mock.Mock(return_value=False)
logger = mock.Mock()
requests = Request(resource=resource, logger=logger)
responses.add(
responses.GET,
'https://example.com/exception',
body=ConnectionError('down'),
)
with pytest.raises(ConnectionError):
requests.get('https://example.com/exception')
assert logger.error.call_count == 1
assert logger.info.call_count == 0
responses.add(
responses.GET,
'https://example.com/exception',
body=ConnectionError('down'),
)
logger = mock.Mock()
resource.down.return_value = True
requests = Request(resource=resource, logger=logger)
with pytest.raises(ConnectionError):
requests.get('https://example.com/exception')
assert logger.error.call_count == 0
assert logger.info.call_count == 1
responses.add(
responses.GET,
'https://example.com/exception',
body='Error',
status=500,
)
responses.add(
responses.GET,
'https://example.com/exception',
body='ok',
)
resource.down.return_value = False
resource.requests_max_retries = {
'total': 3,
'backoff_factor': 0.001,
'status_forcelist': [500],
'connect': 1,
'read': 1,
}
requests = Request(resource=resource, logger=logger)
assert requests.get('https://example.com/exception').text == 'ok'
responses.add(
responses.GET,
'https://example.com/exception',
body='Error',
status=500,
)
resource.down.return_value = True
requests = Request(resource=resource, logger=logger)
assert requests.get('https://example.com/exception').status_code == 500