79 lines
3.3 KiB
Python
79 lines
3.3 KiB
Python
# combo - content management system
|
|
# Copyright (C) 2017 Entr'ouvert
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify it
|
|
# under the terms of the GNU Affero General Public License as published
|
|
# by the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
from django.utils.timezone import now
|
|
|
|
from haystack.management.commands.update_index import Command as UpdateIndexCommand
|
|
|
|
from combo.data.models import Page, ExternalLinkSearchItem
|
|
from combo.apps.search.models import SearchCell
|
|
|
|
|
|
class Command(UpdateIndexCommand):
|
|
|
|
def add_arguments(self, parser):
|
|
super(Command, self).add_arguments(parser)
|
|
parser.add_argument(
|
|
'--skip-external-links-collection', action='store_true', default=False,
|
|
dest='skip_external_links_collection')
|
|
|
|
def handle(self, **options):
|
|
if not any(SearchCell.get_cells_by_search_service('_text')):
|
|
# do not index site if there's no matching search cell
|
|
return
|
|
if not options.get('skip_external_links_collection', False):
|
|
self.collect_external_links(options)
|
|
return super(Command, self).handle(**options)
|
|
|
|
def collect_external_links(self, options):
|
|
start_time = now()
|
|
|
|
if options.get('remove'):
|
|
ExternalLinkSearchItem.objects.all().delete()
|
|
|
|
# assemble external links data
|
|
links = {}
|
|
for page in Page.objects.filter(sub_slug=''):
|
|
if not page.is_visible(user=None):
|
|
continue
|
|
for cell in page.get_cells():
|
|
if not cell.is_visible(user=None):
|
|
continue
|
|
for link_data in cell.get_external_links_data():
|
|
if not link_data['url'] in links:
|
|
# create an entry for that link.
|
|
links[link_data['url']] = {}
|
|
links[link_data['url']]['title'] = link_data['title']
|
|
links[link_data['url']]['all_texts'] = []
|
|
else:
|
|
# if that link already exists, just keep the title as
|
|
# text.
|
|
links[link_data['url']]['all_texts'].append(link_data['title'])
|
|
# additional texts will be assembled and indexed
|
|
links[link_data['url']]['all_texts'].append(link_data.get('text') or '')
|
|
|
|
# save data as ExternalLinkSearchItem objects
|
|
for link_url, link_data in links.items():
|
|
link_object, created = ExternalLinkSearchItem.objects.get_or_create(
|
|
url=link_url,
|
|
defaults={'title': link_data['title']})
|
|
link_object.title = link_data['title']
|
|
link_object.text = '\n'.join(link_data['all_texts'])
|
|
link_object.save()
|
|
|
|
# remove obsolete objects
|
|
ExternalLinkSearchItem.objects.filter(last_update_timestamp__lt=start_time).delete()
|