search server

This commit is contained in:
Frédéric Péters 2012-09-20 14:30:40 +02:00
parent eced00aaf2
commit 3830c8392b
9 changed files with 593 additions and 0 deletions

BIN
server/a1.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

BIN
server/a22.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 B

BIN
server/a8.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 295 B

122
server/index.html Normal file
View File

@ -0,0 +1,122 @@
<!DOCTYPE html>
<html lang="fr">
<head>
<meta charset="utf-8"/>
<link rel="stylesheet" type="text/css" media="all" href="style.css"/>
<script src="js/jquery-1.7.2.min.js"></script>
<title>Recherche</title>
<script type="text/javascript">
var NUM_RESULTS = 25;
function display_results(data) {
console.log('got results', data);
var items = [];
$('#debug').html(data['qtime']);
$.each(data['results'], function(key, val) {
items.push('<li><span class="score">' + Math.min(5, parseInt(val['score']*5)) + '</span> <a href="pdfs/' + val['id'] + '">' + val['id'] + '</a></li>');
});
$('#results').html($('<ul/>', {html: items.join('')}));
var orig_offset = $('#offset').val()*1;
if ($('#offset').val()*1 > 0) {
var prev_start = $('#offset').val()*1 - NUM_RESULTS;
$('#offset').val(prev_start);
link = '<a class="nav prev" href="?' + $('form').serialize() + '">Résultats précédents</a>';
$('#results').append(link);
$('#results .nav').last().data('offset', prev_start);
} else {
link = '<span class="nav prev" href="#">Résultats précédents</span>';
$('#results').append(link);
}
$('#offset').val(orig_offset);
if ($('#offset').val()*1 + $('#count').val()*1 < data['hits']*1) {
console.log('there');
var next_start = $('#offset').val()*1 + NUM_RESULTS;
$('#offset').val(next_start);
link = '<a class="nav next" href="?' + $('form').serialize() + '">Résultats suivants</a>';
$('#results').append(link);
$('#results .nav').last().data('offset', next_start);
}
$('#offset').val(orig_offset);
$('#info').html('Nombre de résultats: ' + data['hits'] +
' - page ' + ((orig_offset/NUM_RESULTS)+1) +
' de ' + Math.max(1, parseInt((((data['hits']-1)/NUM_RESULTS)+1))));
$('#results ul a').hover(
function() { /* on hover */
$('#preview').attr('src', 'about:blank');
$('#preview').attr('src', 'search/' + $(this).attr('href'));
$('#preview').show();
},
function () {
$('#preview').hide();
}
);
$('#results a.nav').click(function() {
var this_link = $(this);
$.getJSON('search/' + $(this).attr('href'), function(data) {
$('#offset').val($(this_link).data('offset'));
display_results(data);
});
return false;
});
}
$(function() {
$('form').submit(function() {
$('#results').html('');
$('#info').html('');
$('#count').val(NUM_RESULTS);
$('#offset').val(0);
$('#preview').hide();
$.getJSON('search/?' + $(this).serialize(), function(data) {
display_results(data);
});
return false;
});
});
</script>
</head>
<body>
<div id="wrap">
<div id="header">
<h1>PFWB</h1>
<span>Prototype</span>
</div>
<div id="splash">
</div>
<div id="content">
<div id="appbar">
<h2>Prototype de moteur de recherche - GED PFWB</h2>
</div>
<form>
<input name="count" id="count" type="hidden"/>
<input name="offset" id="offset" type="hidden"/>
<input name="q"/>
<input type="submit" value="Rechercher"/>
</form>
<div id="info">
</div>
<div id="results">
</div>
<img id="preview"/>
<div id="debug">
</div>
<div id="footer">
Entr'ouvert — 2012
</div>
</div>
</body>
</html>

4
server/js/jquery-1.7.2.min.js vendored Normal file

File diff suppressed because one or more lines are too long

125
server/js/jquery-ui-1.8.21.custom.min.js vendored Normal file

File diff suppressed because one or more lines are too long

172
server/search_server.py Executable file
View File

@ -0,0 +1,172 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#
# Search Server - SCGI server interfacing with Solr
# Copyright (C) 2007-2012 Parlement de la Communauté française de Belgique
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
import sys
import os
import resource
from optparse import OptionParser
from scgi.scgi_server import SCGIServer, SCGIHandler
import time
import syslog
import socket
from pysolr import Solr
import cgi
import json
import cairo
import poppler
import hashlib
class SearchHandler(SCGIHandler):
debug = False
daemon = False
solr = None
def handle_connection(self, conn):
if not self.solr:
self.solr = Solr('http://127.0.0.1:8080/solr/')
input = conn.makefile('r')
output = conn.makefile('w')
env = self.read_env(input)
bodysize = int(env.get('CONTENT_LENGTH', 0))
try:
self.produce(env, bodysize, input, output)
finally:
output.close()
input.close()
conn.close()
def produce(self, env, bodysize, input, output):
if self.debug:
print 'Request received at', time.strftime('[%Y-%m-%d %H:%M]')
print ' - body size:', bodysize
if bodysize != 0:
return self.error_page(output, 'this server only supports GET')
uri = env.get('REQUEST_URI')[len('/search/'):]
if self.debug:
print ' - uri:', uri
if uri.startswith('pdfs/'):
preview_filename = '/tmp/preview-%s.png' % hashlib.sha1(uri).hexdigest()
if uri.endswith('.png'):
uri = uri[:-4]
if not os.path.exists(preview_filename):
uri = 'file://' + os.getcwd() + '/' + uri
document = poppler.document_new_from_file(uri, None)
n_pages = document.get_n_pages()
current_page = document.get_page(0)
scale = 0.8
width, height = current_page.get_size()
surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(width*scale), int(height*scale))
cr = cairo.Context(surface)
cr.set_source_rgb(1, 1, 1)
if scale != 1:
cr.scale(scale, scale)
cr.rectangle(0, 0, width, height)
cr.fill()
current_page.render(cr)
surface.write_to_png(preview_filename)
print >> output, 'Content-type: image/png'
print >> output, ''
print ' - preview:', preview_filename
output.write(file(preview_filename).read())
return
if not env.get('QUERY_STRING'):
# homepage
print >> output, 'Content-type: text/html'
print >> output, ''
output.write(file('search.html').read())
return
qs = cgi.parse_qs(env.get('QUERY_STRING'))
try:
query = unicode(qs['q'][0], 'utf-8')
except KeyError:
return self.error_page('missing q parameter')
if 'count' in qs:
count = int(qs['count'][0])
else:
count = 20
if 'offset' in qs:
offset = int(qs['offset'][0])
else:
offset = 0
results = self.solr.search(query, rows=count, start=offset, fl='* score')
response = {
'hits': results.hits,
'qtime': results.qtime,
'results': [{'score': x['score'], 'id': x['id']} for x in results],
}
print >> output, 'Content-type: application/json'
print >> output, ''
json.dump(response, output)
def error_page(self, output, message):
print >> output, 'Content-type: text/plain'
print >> output, ''
print >> output, message
def main():
parser = OptionParser()
parser.add_option('-p', '--port', dest = 'port', type='int', default = 2152)
parser.add_option('--debug', action = 'store_true', dest = 'debug')
parser.add_option('-f', '--foreground', dest='foreground', action='store_true')
parser.add_option('--pid', dest='pid')
options, args = parser.parse_args()
if not options.foreground:
SearchHandler.daemon = True
if os.fork():
os._exit(0)
os.setsid()
maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
if maxfd == resource.RLIM_INFINITY:
maxfd = 1024
for fd in range(maxfd):
try:
os.close(fd)
except OSError:
pass
os.open('/dev/null', os.O_RDWR)
os.dup2(0, 1)
os.dup2(0, 2)
if os.fork():
os._exit(0)
if options.pid:
file(options.pid, 'w').write(str(os.getpid()))
syslog.openlog('tabellio-search')
SearchHandler.debug = options.debug
try:
SCGIServer(handler_class=SearchHandler, port=options.port).serve()
except socket.error:
if SearchHandler.daemon:
syslog.syslog(syslog.LOG_CRIT, 'socket error (another instance is running?)')
print >> sys.stderr, 'E: socket error (another instance is running?)'
sys.exit(1)
if __name__ == '__main__':
main()

BIN
server/sky-birds.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

170
server/style.css Normal file
View File

@ -0,0 +1,170 @@
/* theme derived and inspired by TerraFirma
* <http://www.oswd.org/design/information/id/3557/>
*/
html, body {
margin: 0;
font-family: sans-serif;
}
html {
background: #F9F9F7 url(a1.gif) repeat-x;
color: #8c8c73;
}
a {
color: #413C3C;
text-decoration: underline;
}
a:hover {
text-decoration: none;
}
div#wrap {
background: white;
width: 95%;
margin: 2em auto;
margin-bottom: 0;
padding: 15px;
-moz-border-radius: 6px;
-webkit-border-radius:6px;
-moz-box-shadow: 0 0 4px rgba(0,0,0,0.75);
-webkit-box-shadow: 0 0 4px rgba(0,0,0,0.75);
position: relative;
}
#header
{
position: absolute;
background: #FF7800 url(a8.gif) repeat-x;
-moz-border-radius: 6px 0 0 6px;
-webkit-border-radius: 6px 0 0 6px;
width: 212px;
height: 92px;
color: #fff;
padding-left: 10px;
z-index: 14;
}
#header h1
{
font-size: 23px;
letter-spacing: -1px;
padding-top: 30px;
margin: 0;
}
#header span
{
margin: 0;
font-size: 10px;
font-weight: normal;
color: #FCE2CA;
}
#header span a {
color: white;
}
#splash
{
position: absolute;
right: 15px;
background: #5D91E5 url(sky-birds.jpg) no-repeat top right;
width: 90%;
height: 87px;
-moz-border-radius: 0 6px 6px 0;
-webkit-border-radius: 0 6px 6px 0;
z-index: 10;
color: white;
font-weight: bold;
text-shadow: 1px 1px 1px black;
text-align: right;
padding-right: 10px;
padding-top: 5px;
}
div#content {
margin: 1em 1ex;
margin-top: 100px;
padding: 1ex;
}
div#content h2 {
margin-top: 0;
font-weight: normal;
color: #656551;
font-size: 18px;
letter-spacing: -1px;
line-height: 25px;
margin-bottom: 20px;
padding: 0 0 10px 15px;
position: relative;
top: 4px;
background: url(../images/a22.gif) bottom repeat-x;
}
#footer
{
font-size: 70%;
position: relative;
clear: both;
height: 66px;
text-align: center;
line-height: 66px;
background-image: url(../images/a50.gif);
color: #A8A88D;
}
#footer a
{
color: #8C8C73;
}
div#appbar a {
position: relative;
top: -45px;
float: right;
padding-left: 15px;
padding-right: 15px;
}
div#content h2 {
margin-top: 0;
font-weight: normal;
color: #656551;
font-size: 18px;
letter-spacing: -1px;
line-height: 25px;
margin-bottom: 20px;
padding: 0 0 10px 15px;
position: relative;
top: 4px;
background: url(a22.gif) bottom repeat-x;
}
div#debug {
position: absolute;
top: 0;
right: 0;
}
#preview {
position: absolute;
top: 200px;
left: 400px;
}
a.nav.next {
margin-left: 10px;
}
span.score {
display: none;
}