workflows: do not feed nul bytes to FTS (#88716) #1318

Merged
fpeters merged 1 commits from wip/88716-fts-nul-byte into main 2024-03-29 08:33:59 +01:00
2 changed files with 21 additions and 1 deletions

View File

@ -607,3 +607,22 @@ def test_register_comment_to_with_attachment(pub):
assert 'to-role.txt' in display_parts()[2]
assert 'to-submitter.txt' in display_parts()[4]
assert 'to-role-or-submitter.txt' in display_parts()[6]
def test_register_comment_fts(pub):
pub.substitutions.feed(MockSubstitutionVariables())
formdef = FormDef()
formdef.name = 'baz'
formdef.fields = []
formdef.store()
formdata = formdef.data_class()()
formdata.just_created()
formdata.store()
item = RegisterCommenterWorkflowStatusItem()
item.comment = 'Hello\x00\nworld'
item.perform(formdata)
assert formdata.evolution[-1].parts[-1].content == '<p>Hello\x00\nworld</p>' # kept
assert formdata.evolution[-1].parts[-1].render_for_fts() == 'Hello world' # not kept

View File

@ -360,7 +360,8 @@ class EvolutionPart:
if not self.view or self.to:
# don't include parts with no content or restricted visibility
return ''
return misc.html2text(self.view() or '')
illegal_fts_chars = re.compile(r'[\x00-\x1F]')

Ok, on dégage les trente et un premiers code points qui correspondent à des caractères de contrôle.

Edit: en fait les trente-deux premiers :)

Ok, on dégage les trente et un premiers _code points_ qui correspondent à des caractères de contrôle. Edit: en fait les trente-deux premiers :)

Je me suis laissé emporter :) J'ai renommé en "workflows: do not feed ascii control characters to FTS ".

Je me suis laissé emporter :) J'ai renommé en "workflows: do not feed ascii control characters to FTS ".

Je me suis laissé emporter :) J'ai renommé en "workflows: do not feed ascii control characters to FTS ".

Top, merci.

> Je me suis laissé emporter :) J'ai renommé en "workflows: do not feed ascii control characters to FTS ". Top, merci.
return illegal_fts_chars.sub(' ', misc.html2text(self.view() or ''))
class AttachmentEvolutionPart(EvolutionPart):