diff --git a/src/pfwbged/basecontent/indexers.py b/src/pfwbged/basecontent/indexers.py new file mode 100644 index 0000000..5d64d97 --- /dev/null +++ b/src/pfwbged/basecontent/indexers.py @@ -0,0 +1,51 @@ +from ZODB.POSException import ConflictError +from five import grok +from Products.CMFCore.utils import getToolByName +from plone.indexer import indexer + +from zope.component import getUtility +from zope.schema.interfaces import IVocabularyFactory + + +from .behaviors import IPfwbDocument + +@indexer(IPfwbDocument) +def document_dynamic_searchable_text_indexer(obj): + indexed_elements = [obj.title] + + # if there is no path to text/plain, do nothing + transforms = getToolByName(obj, 'portal_transforms') + + had_version = False + for child in reversed(obj.values()): + if child.portal_type in ('dmsmainfile', 'dmsappendixfile'): + if not child.file or child.file.getSize() == 0: + continue + + if not transforms._findPath(child.file.contentType, 'text/plain'): + continue + + # only index the latest version + if child.portal_type == 'dmsmainfile' and had_version: + continue + had_version = True + + # convert it to text/plain + try: + datastream = transforms.convertTo( + 'text/plain', child.file.data, mimetype=child.file.contentType, + filename=child.file.filename) + indexed_elements.append(unicode(datastream.getData(), 'utf-8')) + except (ConflictError, KeyboardInterrupt): + raise + + if hasattr(obj, 'keywords'): + factory = getUtility(IVocabularyFactory, 'dms.thesaurus.simple') + vocabulary = factory(obj) + for keyword in obj.keywords: + indexed_elements.append(vocabulary.getTerm(keyword).title) + + return u' '.join(indexed_elements) + +grok.global_adapter(document_dynamic_searchable_text_indexer, + name='SearchableText')