diff --git a/data/webbots b/data/webbots index 408fee26d..e175334b5 100644 --- a/data/webbots +++ b/data/webbots @@ -13,7 +13,6 @@ AltaVista-Intranet jan.gelin@av.com FAST-WebCrawler crawler@fast.no -Wget Acoon Robot antibot Atomz diff --git a/tests/form_pages/test_all.py b/tests/form_pages/test_all.py index f7d0f1f44..0a1b5110d 100644 --- a/tests/form_pages/test_all.py +++ b/tests/form_pages/test_all.py @@ -2148,6 +2148,36 @@ def test_form_draft_with_date(pub): assert '2012-02-12' in resp.text +def test_form_draft_from_prefill(pub): + formdef = create_formdef() + formdef.fields = [ + fields.StringField(id='0', label='string'), + ] + formdef.enable_tracking_codes = True + formdef.store() + formdef.data_class().wipe() + + # no draft + resp = get_app(pub).get('/test/') + assert formdef.data_class().count() == 0 + formdef.data_class().wipe() + + # draft created if there's been some prefilled fields + formdef.fields[0].prefill = {'type': 'string', 'value': '{{request.GET.test|default:""}}'} + formdef.store() + resp = get_app(pub).get('/test/?test=hello') + assert formdef.data_class().count() == 1 + formdef.data_class().wipe() + + # unless the call was made from an application + resp = get_app(pub).get('/test/?test=hello', headers={'User-agent': 'python-requests/0'}) + assert formdef.data_class().count() == 0 + + # or a bot + resp = get_app(pub).get('/test/?test=hello', headers={'User-agent': 'Googlebot'}) + assert formdef.data_class().count() == 0 + + @pytest.mark.parametrize('tracking_code', [True, False]) def test_form_direct_draft_access(pub, tracking_code): user = create_user(pub) diff --git a/wcs/forms/root.py b/wcs/forms/root.py index 711e713ff..0fca2b681 100644 --- a/wcs/forms/root.py +++ b/wcs/forms/root.py @@ -43,7 +43,6 @@ from ..qommon import errors, get_cfg from ..qommon import misc, get_logger from ..qommon import template from ..qommon.form import * -from ..qommon.logger import BotFilter from ..qommon import emails from wcs.categories import Category @@ -172,7 +171,7 @@ class TrackingCodeDirectory(Directory): raise errors.TraversalError() if formdata.formdef.enable_tracking_codes is False: raise errors.TraversalError() - if BotFilter.is_bot(): + if get_request().is_from_bot(): raise errors.AccessForbiddenError() get_session().mark_anonymous_formdata(formdata) return redirect(formdata.get_url()) @@ -466,7 +465,7 @@ class FormPage(Directory, FormTemplateMixin): # include prefilled data transient_formdata = self.get_transient_formdata() transient_formdata.data.update(self.formdef.get_data(form)) - if self.has_draft_support(): + if self.has_draft_support() and not (req.is_from_application() or req.is_from_bot()): # save to get prefilling data in database self.save_draft(form_data) else: diff --git a/wcs/qommon/http_request.py b/wcs/qommon/http_request.py index a51e43c9c..326b0637c 100644 --- a/wcs/qommon/http_request.py +++ b/wcs/qommon/http_request.py @@ -184,6 +184,18 @@ class HTTPRequest(quixote.http_request.HTTPRequest): def is_in_frontoffice(self): return not (self.is_in_backoffice() or self.is_api_url()) + def is_from_bot(self): + from .logger import BotFilter + return BotFilter.is_bot(request=self) + + def is_from_application(self): + # detect calls made from other applications or debug tools + # this is not to detect bots (is_from_bot above) + user_agent = self.get_environ('HTTP_USER_AGENT', '') + return (user_agent.startswith('python-requests') or + user_agent.startswith('curl') or + user_agent.startswith('Wget')) + @property def META(self): return self.environ diff --git a/wcs/qommon/logger.py b/wcs/qommon/logger.py index 89ffc02d1..46ba4fb12 100644 --- a/wcs/qommon/logger.py +++ b/wcs/qommon/logger.py @@ -52,10 +52,12 @@ class BotFilter(logging.Filter): return 1 @classmethod - def is_bot(cls): + def is_bot(cls, request=None): + if request is None: + request = get_request() botfile = os.path.join(get_publisher().data_dir, 'webbots') - if os.path.exists(botfile) and get_request(): - user_agent = get_request().get_environ('HTTP_USER_AGENT', '') + if request and os.path.exists(botfile): + user_agent = request.get_environ('HTTP_USER_AGENT', '') with open(botfile) as fd: for bot_ua_string in [x.strip() for x in fd.readlines()]: if bot_ua_string in user_agent: