# combo - content management system # Copyright (C) 2014-2017 Entr'ouvert # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from django import template from django.contrib.auth.models import Group from django.contrib.contenttypes import fields from django.contrib.contenttypes.models import ContentType from django.core.exceptions import PermissionDenied from django.db import models from django.db.models import JSONField from django.http import HttpResponse, HttpResponseBadRequest from django.shortcuts import get_object_or_404 from django.template import RequestContext, Template from django.utils.functional import cached_property from django.utils.http import quote from django.utils.translation import gettext_lazy as _ from combo.data.library import register_cell_class from combo.data.models import CellBase, Page from combo.utils import get_templated_url, requests from . import engines def get_root_page_and_children(service_slug): if not service_slug.startswith('_text_page_'): return page_slug = service_slug.replace('_text_page_', '') try: root_page = Page.objects.get(slug=page_slug, sub_slug='') except (Page.DoesNotExist, Page.MultipleObjectsReturned): return return root_page.get_descendants_and_me() @register_cell_class class SearchCell(CellBase): default_template_name = 'combo/search-cell.html' manager_form_template = 'combo/manager/search-cell-form.html' exclude_from_search = True _search_services = JSONField(_('Search Services'), default=dict, blank=True) title = models.CharField(_('Title'), max_length=150, blank=True) autofocus = models.BooleanField(_('Autofocus'), default=False) input_placeholder = models.CharField(_('Placeholder'), max_length=64, default="", blank=True) class Meta: verbose_name = _('Search') def is_visible(self, request, **kwargs): if not self._search_services.get('data'): return False return super().is_visible(request, **kwargs) def get_default_form_class(self): from .forms import SearchCellForm return SearchCellForm @property def varname(self): if self.slug: # no hyphen in varname, could be used in context and templates return self.slug.replace('-', '_') return '' @cached_property def search_services(self): services = [] for service_slug in self._search_services.get('data') or []: service = engines.get(service_slug) if service_slug.startswith('_text_page_'): service = engines.get('_text') if service_slug.startswith('cards:'): parts = service_slug.split(':') without_user = False if parts[2].endswith('__without-user__'): without_user = True parts[2] = parts[2].replace('__without-user__', '') if not service: # retry with cleaned parts service = engines.get(':'.join(parts[:3])) if not service: # still not found continue if len(parts) > 3: custom_views_by_id = {c['id']: c['text'] for c in service.get('custom_views') or []} service['label'] = '%s - %s' % (service['label'], custom_views_by_id.get(parts[3])) service['selected_custom_view'] = parts[3] if without_user: service['without_user'] = True service['label'] = '%s (%s)' % (service['label'], _('logged-in user ignored')) if service and (service.get('url') or service.get('function')): service['slug'] = service_slug service['options'] = self._search_services.get('options', {}).get(service_slug) services.append(service) return services @cached_property def available_engines(self): all_engines = engines.get_engines() # always remove _text engine: we can add search on page and sub pages current_engines = [ e['slug'] for e in self.search_services if e['slug'] != '_text' and not e['slug'].startswith('cards:') ] return {k: v for k, v in all_engines.items() if k not in current_engines} def get_search_services_for_display(self): # get pages for _text engines page_slugs = [ e['slug'].replace('_text_page_', '') for e in self.search_services if e['slug'].startswith('_text_page_') ] pages = Page.objects.filter(snapshot__isnull=True, sub_slug='', slug__in=page_slugs).values( 'slug', 'title' ) pages_by_slug = {'_text_page_%s' % p['slug']: p['title'] for p in pages} services = [] for service in self.search_services: label = service['label'] if service['slug'] in pages_by_slug: label = _('Page "%(page)s" and sub pages Contents') % {'page': pages_by_slug[service['slug']]} services.append((service['slug'], label, service['options'])) return services @property def has_multiple_search_services(self): return len(self._search_services.get('data') or []) > 1 @classmethod def get_cells_by_search_service(cls, search_service): for cell in cls.objects.all(): if search_service in (cell._search_services.get('data') or []): yield cell def get_serialized_cell(self): serialized_cell = super().get_serialized_cell() for options in (serialized_cell['fields']['_search_services'].get('options') or {}).values(): if options.get('target_page') not in ['', None]: try: target_page = Page.objects.get(pk=options['target_page']) options['target_page'] = target_page.natural_key()[0] except Page.DoesNotExist: options['target_page'] = '' return serialized_cell @classmethod def prepare_serialized_data(cls, cell_data): if not cell_data['fields'].get('_search_services'): return cell_data if not cell_data['fields']['_search_services'].get('options'): return cell_data for options in cell_data['fields']['_search_services']['options'].values(): if options.get('target_page'): options['target_page'] = Page.get_page_ids_by_uuids().get(options['target_page']) return cell_data def modify_global_context(self, context, request): # if self.varname is in the query string (of the page), # add it to the global context; so it can be used by others cells # for example by a JsonCell with ...[self.varname]... in its URL if self.varname and self.varname in request.GET: context[self.varname] = request.GET.get(self.varname) def get_cell_extra_context(self, context): extra_context = super().get_cell_extra_context(context) # if there is a q_ in query_string, send it to the template (to be # used as an initial query) and remove it from query_string initial_q = None initial_query_string = None if context.get('request'): request_get = context['request'].GET.copy() if self.varname and context.get('request'): q_varname = 'q_%s' % self.varname if q_varname in request_get: initial_q = request_get[q_varname] del request_get[q_varname] initial_query_string = request_get.urlencode() extra_context.update({'initial_q': initial_q, 'initial_query_string': initial_query_string}) return extra_context @classmethod def ajax_results_view(cls, request, cell_pk, service_slug): cell = get_object_or_404(cls, pk=cell_pk) if not cell.is_visible(request) or not cell.page.is_visible(request.user): raise PermissionDenied if 'q' not in request.GET: return HttpResponseBadRequest('missing query parameter') query = request.GET.get('q') if '\x00' in query: # nul byte return HttpResponseBadRequest('invalid query string') def render_response(service=None, results=None, pages=None): service = service or {} results = results or {'err': 0, 'data': []} template_names = ['combo/search-cell-results.html'] if cell.slug: template_names.insert(0, 'combo/cells/%s/search-cell-results.html' % cell.slug) tmpl = template.loader.select_template(template_names) service_label = service.get('label') if pages: service_label = _('Page "%(page)s" and sub pages Contents') % {'page': pages[0].title} custom_title = None try: # optional label defined with engine if service['options']['title']: custom_title = service['options']['title'] except (KeyError, TypeError): pass context = { 'cell': cell, 'results': results, 'search_service': service, 'search_service_label': service_label, 'search_service_title': custom_title, 'query': query, } return HttpResponse(tmpl.render(context, request), content_type='text/html') for service in cell.search_services: if service.get('slug') == service_slug: break else: return render_response() if not query: return render_response(service) pages = None if service.get('function'): # internal search engine pages = get_root_page_and_children(service_slug) try: with_description = service['options']['with_description'] except (KeyError, TypeError): with_description = None results = { 'data': service['function'](request, query, pages=pages, with_description=with_description) } else: url = get_templated_url( service['url'], context={'request': request, 'q': query, 'search_service': service} ) if '%(q)s' in url: # escape percent signs by doubling them, then restore %(q)s url = url.replace('%', '%%').replace('%%(q)s', '%(q)s') url = url % {'q': quote(query.encode('utf-8'))} if url.startswith('/'): url = request.build_absolute_uri(url) if not url: return render_response(service) kwargs = {} kwargs['cache_duration'] = service.get('cache_duration', 0) kwargs['remote_service'] = 'auto' if service.get('signature') else None # don't automatically add user info to query string, if required it can # be set explicitely in the URL template in the engine definition (via # {{user_nameid}} or {{user_email}}). kwargs['without_user'] = True # don't send error traces on HTTP errors kwargs['log_errors'] = 'warn' response = requests.get(url, **kwargs) try: results = response.json() except ValueError: return render_response(service) if service.get('data_key'): results['data'] = results.get(service['data_key']) or [] hit_templates = {} options = service.get('options') or {} if options.get('target_page'): try: page = Page.objects.get(pk=options['target_page']) page_base_url = page.get_online_url() hit_templates['url'] = Template('%s{{ id }}/' % page_base_url) except Page.DoesNotExist: pass if not hit_templates.get('url') and service.get('hit_url_template'): hit_templates['url'] = Template(service['hit_url_template']) if service.get('hit_label_template'): hit_templates['text'] = Template(service['hit_label_template']) if options.get('description_template'): hit_templates['description'] = Template(service['options']['description_template']) if not hit_templates.get('description') and service.get('hit_description_template'): hit_templates['description'] = Template(service['hit_description_template']) if hit_templates: for hit in results.get('data') or []: for k, v in hit_templates.items(): hit[k] = v.render(RequestContext(request, hit)) return render_response(service, results, pages=pages) def has_text_search_service(self): return any(key.startswith('_text') for key in self._search_services.get('data', [])) def missing_index(self): return IndexedCell.objects.all().count() == 0 class IndexedCell(models.Model): cell_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) cell_pk = models.PositiveIntegerField(null=True) cell = fields.GenericForeignKey('cell_type', 'cell_pk') page = models.ForeignKey(Page, on_delete=models.CASCADE, blank=True, null=True) url = models.CharField(max_length=5000, blank=True, null=True) title = models.CharField(max_length=500, blank=True, null=True) indexed_text = models.TextField(blank=True, null=True) public_access = models.BooleanField(default=False) restricted_groups = models.ManyToManyField(Group, blank=True, related_name='+') excluded_groups = models.ManyToManyField(Group, blank=True, related_name='+') last_update_timestamp = models.DateTimeField(auto_now=True)