combo/combo/apps/search/management/commands/update_index.py

79 lines
3.3 KiB
Python

# combo - content management system
# Copyright (C) 2017 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from django.utils.timezone import now
from haystack.management.commands.update_index import Command as UpdateIndexCommand
from combo.data.models import Page, ExternalLinkSearchItem
from combo.apps.search.models import SearchCell
class Command(UpdateIndexCommand):
def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument(
'--skip-external-links-collection', action='store_true', default=False,
dest='skip_external_links_collection')
def handle(self, **options):
if not any(SearchCell.get_cells_by_search_service('_text')):
# do not index site if there's no matching search cell
return
if not options.get('skip_external_links_collection', False):
self.collect_external_links(options)
return super(Command, self).handle(**options)
def collect_external_links(self, options):
start_time = now()
if options.get('remove'):
ExternalLinkSearchItem.objects.all().delete()
# assemble external links data
links = {}
for page in Page.objects.filter(sub_slug=''):
if not page.is_visible(user=None):
continue
for cell in page.get_cells():
if not cell.is_visible(user=None):
continue
for link_data in cell.get_external_links_data():
if not link_data['url'] in links:
# create an entry for that link.
links[link_data['url']] = {}
links[link_data['url']]['title'] = link_data['title']
links[link_data['url']]['all_texts'] = []
else:
# if that link already exists, just keep the title as
# text.
links[link_data['url']]['all_texts'].append(link_data['title'])
# additional texts will be assembled and indexed
links[link_data['url']]['all_texts'].append(link_data.get('text') or '')
# save data as ExternalLinkSearchItem objects
for link_url, link_data in links.items():
link_object, created = ExternalLinkSearchItem.objects.get_or_create(
url=link_url,
defaults={'title': link_data['title']})
link_object.title = link_data['title']
link_object.text = '\n'.join(link_data['all_texts'])
link_object.save()
# remove obsolete objects
ExternalLinkSearchItem.objects.filter(last_update_timestamp__lt=start_time).delete()