From 09de4122cf7caa91612867479e9a6b04a4dd67bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9ters?= Date: Tue, 3 Dec 2013 11:03:22 +0100 Subject: [PATCH] add custom indexer to also include thesaurus terms (#3675) --- src/pfwbged/basecontent/indexers.py | 51 +++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 src/pfwbged/basecontent/indexers.py diff --git a/src/pfwbged/basecontent/indexers.py b/src/pfwbged/basecontent/indexers.py new file mode 100644 index 0000000..5d64d97 --- /dev/null +++ b/src/pfwbged/basecontent/indexers.py @@ -0,0 +1,51 @@ +from ZODB.POSException import ConflictError +from five import grok +from Products.CMFCore.utils import getToolByName +from plone.indexer import indexer + +from zope.component import getUtility +from zope.schema.interfaces import IVocabularyFactory + + +from .behaviors import IPfwbDocument + +@indexer(IPfwbDocument) +def document_dynamic_searchable_text_indexer(obj): + indexed_elements = [obj.title] + + # if there is no path to text/plain, do nothing + transforms = getToolByName(obj, 'portal_transforms') + + had_version = False + for child in reversed(obj.values()): + if child.portal_type in ('dmsmainfile', 'dmsappendixfile'): + if not child.file or child.file.getSize() == 0: + continue + + if not transforms._findPath(child.file.contentType, 'text/plain'): + continue + + # only index the latest version + if child.portal_type == 'dmsmainfile' and had_version: + continue + had_version = True + + # convert it to text/plain + try: + datastream = transforms.convertTo( + 'text/plain', child.file.data, mimetype=child.file.contentType, + filename=child.file.filename) + indexed_elements.append(unicode(datastream.getData(), 'utf-8')) + except (ConflictError, KeyboardInterrupt): + raise + + if hasattr(obj, 'keywords'): + factory = getUtility(IVocabularyFactory, 'dms.thesaurus.simple') + vocabulary = factory(obj) + for keyword in obj.keywords: + indexed_elements.append(vocabulary.getTerm(keyword).title) + + return u' '.join(indexed_elements) + +grok.global_adapter(document_dynamic_searchable_text_indexer, + name='SearchableText')