workflows: do not feed ascii control characters to FTS (#88716)

This commit is contained in:
Frédéric Péters 2024-03-26 16:54:57 +01:00
parent ee6d557f6e
commit 990dde7060
2 changed files with 21 additions and 1 deletions

View File

@ -607,3 +607,22 @@ def test_register_comment_to_with_attachment(pub):
assert 'to-role.txt' in display_parts()[2]
assert 'to-submitter.txt' in display_parts()[4]
assert 'to-role-or-submitter.txt' in display_parts()[6]
def test_register_comment_fts(pub):
pub.substitutions.feed(MockSubstitutionVariables())
formdef = FormDef()
formdef.name = 'baz'
formdef.fields = []
formdef.store()
formdata = formdef.data_class()()
formdata.just_created()
formdata.store()
item = RegisterCommenterWorkflowStatusItem()
item.comment = 'Hello\x00\nworld'
item.perform(formdata)
assert formdata.evolution[-1].parts[-1].content == '<p>Hello\x00\nworld</p>' # kept
assert formdata.evolution[-1].parts[-1].render_for_fts() == 'Hello world' # not kept

View File

@ -360,7 +360,8 @@ class EvolutionPart:
if not self.view or self.to:
# don't include parts with no content or restricted visibility
return ''
return misc.html2text(self.view() or '')
illegal_fts_chars = re.compile(r'[\x00-\x1F]')
return illegal_fts_chars.sub(' ', misc.html2text(self.view() or ''))
class AttachmentEvolutionPart(EvolutionPart):