emails: ignore unexpected title overlines or transitions (#38817)

2020-01-11 11:06:31 +01:00 · 2020-01-11 11:06:31 +01:00 · bbaa2d18be
parent 08e085d5b9
commit bbaa2d18be
2 changed files with 51 additions and 2 deletions
--- a/tests/test_misc.py
+++ b/tests/test_misc.py
@ -460,6 +460,32 @@ M. Francis Kuntz
    assert b'arabic simple' in html
    assert b'M. Francis Kuntz' in html

+@pytest.mark.skipif('docutils is None')
+def test_email_with_unexpected_transition(emails):
+    pub = create_temporary_pub()
+    mail_body = '''
+Value:
+ A
+
+Other value:
+ ?????????
+
+Plop:
+ C
+
+bye,
+'''
+    send_email('test', mail_body=mail_body, email_rcpt='test@localhost')
+    assert emails.count() == 1
+    assert emails.emails['test']['msg'].is_multipart()
+    assert emails.emails['test']['msg'].get_content_subtype() == 'alternative'
+    assert emails.emails['test']['msg'].get_payload()[0].get_content_type() == 'text/plain'
+    assert emails.emails['test']['msg'].get_payload()[1].get_content_type() == 'text/html'
+    text = emails.emails['test']['msg'].get_payload()[0].get_payload(decode=True)
+    html = emails.emails['test']['msg'].get_payload()[1].get_payload(decode=True)
+    assert text.count(b'\n ?????????\n') == 1
+    assert html.count(b'<dd>?????????</dd>') == 1
+
 def test_dict_from_prefix():
    d = evalutils.dict_from_prefix('var1', {})
    assert d == {}
--- a/wcs/qommon/emails.py
+++ b/wcs/qommon/emails.py
@ -60,19 +60,42 @@ except ImportError:


 if docutils:
-    # custom parser to only allow arabic sequences, this prevents the rst
-    # parser to consider M. as starting a (upper alpha / roman) sequence.
+    from docutils import statemachine
+
    class Body(docutils.parsers.rst.states.Body):
        def is_enumerated_list_item(self, ordinal, sequence, format):
+            # customised to only allow arabic sequences, this prevents the rst
+            # parser to consider M. as starting a (upper alpha / roman) sequence.
            if format == 'period' and sequence != 'arabic':
                return False
            return docutils.parsers.rst.states.Body.is_enumerated_list_item(
                    self, ordinal, sequence, format)

+        def line(self, match, context, next_state):
+            # customised to ignore unexpected overlines or transitions (due
+            # for example by a field filled by question marks.
+            if self.state_machine.match_titles:
+                return [match.string], 'Line', []
+            elif match.string.strip() == '::':
+                raise statemachine.TransitionCorrection('text')
+            else:
+                # Unexpected possible title overline or transition.
+                # Treating it as ordinary text.
+                raise statemachine.TransitionCorrection('text')
+
    class CustomRstParser(docutils.parsers.rst.Parser):
        def __init__(self, *args, **kwargs):
            docutils.parsers.rst.Parser.__init__(self, *args, **kwargs)
            self.state_classes = tuple([Body] + list(self.state_classes[1:]))
+            docutils.parsers.rst.states.state_classes = self.state_classes
+
+    def custom_rststate_init(self, state_machine, debug=False):
+        state_classes = tuple([Body] + list(docutils.parsers.rst.states.state_classes[1:]))
+        self.nested_sm_kwargs = {'state_classes': state_classes,
+                                 'initial_state': 'Body'}
+        docutils.parsers.rst.states.StateWS.__init__(self, state_machine, debug)
+
+    docutils.parsers.rst.states.RSTState.__init__ = custom_rststate_init


 def custom_template_email(key, mail_body_data, email_rcpt, **kwargs):