From 7b35ba0c7667365e47ed98d61d6337d45d6ef493 Mon Sep 17 00:00:00 2001 From: Dave Hall Date: Wed, 27 Sep 2017 10:24:45 +0100 Subject: [PATCH] Fixing escaping of <> characters in postgres. Closes #219 --- tests/test_watson/tests.py | 33 +++++---------------------------- watson/backends.py | 2 +- 2 files changed, 6 insertions(+), 29 deletions(-) diff --git a/tests/test_watson/tests.py b/tests/test_watson/tests.py index 82121a8..adbea5d 100644 --- a/tests/test_watson/tests.py +++ b/tests/test_watson/tests.py @@ -10,7 +10,7 @@ these tests have been amended to 'fooo' and 'baar'. Ho hum. from __future__ import unicode_literals import json -import re +import string try: from unittest import skipUnless @@ -26,7 +26,6 @@ from django.utils.encoding import force_text from watson import search as watson from watson.models import SearchEntry -from watson.backends import escape_query from test_watson.models import WatsonTestModel1, WatsonTestModel2 from test_watson import admin # Force early registration of all admin models. # noQA @@ -48,32 +47,6 @@ class RegistrationTest(TestCase): self.assertRaises(watson.RegistrationError, lambda: isinstance(watson.get_adapter(WatsonTestModel1))) -class EscapingTest(TestCase): - def testEscaping(self): - # Test query escaping. - re_escape_chars = re.compile(r'[&:"(|)!><~*+-]', re.UNICODE) - self.assertEqual(escape_query("", re_escape_chars), "") - self.assertEqual(escape_query("abcd", re_escape_chars), "abcd") - self.assertEqual(escape_query("abcd efgh", re_escape_chars), "abcd efgh") - self.assertEqual(escape_query("abcd efgh", re_escape_chars), "abcd efgh") - self.assertEqual(escape_query("&&abcd&", re_escape_chars), "abcd") - - # check if we leave good characters - good_chars = "'$@#$^=_.," - for char in good_chars: - self.assertEqual( - escape_query("abcd{}efgh".format(char), re_escape_chars), - "abcd{}efgh".format(char) - ) - - # now the ones where we replace harmful characters - bad_chars = '&:"(|)!><~*+-' - for char in bad_chars: - self.assertEqual( - escape_query("abcd{}efgh".format(char), re_escape_chars), "abcd efgh" - ) - - complex_registration_search_engine = watson.SearchEngine("restricted") @@ -285,6 +258,10 @@ class InternalsTest(SearchTestBase): class SearchTest(SearchTestBase): + def testEscaping(self): + # This must not crash the database with a syntax error. + list(watson.search(string.printable)) + def emptySearchTextGivesNoResults(self): self.assertEqual(watson.search("").count(), 0) self.assertEqual(watson.search(" ").count(), 0) diff --git a/watson/backends.py b/watson/backends.py index 369f7a5..3cd3a65 100644 --- a/watson/backends.py +++ b/watson/backends.py @@ -23,7 +23,7 @@ def regex_from_word(word): # PostgreSQL to_tsquery operators: ! & : ( ) | -RE_POSTGRES_ESCAPE_CHARS = re.compile(r'[&:(|)!]', re.UNICODE) +RE_POSTGRES_ESCAPE_CHARS = re.compile(r'[&:(|)!><]', re.UNICODE) # MySQL boolean full-text search operators: > < ( ) " ~ * + - RE_MYSQL_ESCAPE_CHARS = re.compile(r'["()><~*+-]', re.UNICODE)