Basic tools to interact with solr
This commit is contained in:
commit
eced00aaf2
|
@ -0,0 +1,42 @@
|
|||
#! /usr/bin/env python
|
||||
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
from pysolr import Solr
|
||||
|
||||
|
||||
if os.path.isdir(sys.argv[1]):
|
||||
os.chdir(sys.argv[1])
|
||||
if not os.path.exists('/tmp/.importged-filenames'):
|
||||
os.system('ls -1 > /tmp/.importged-filenames')
|
||||
filenames = open('/tmp/.importged-filenames')
|
||||
else:
|
||||
filenames = [sys.argv[1]]
|
||||
|
||||
conn = Solr('http://127.0.0.1:8080/solr/')
|
||||
|
||||
for i, filename in enumerate(filenames):
|
||||
if i < 877375: continue
|
||||
filename = filename.strip()
|
||||
if not filename.endswith('.pdf'):
|
||||
continue
|
||||
print '[%d] importing filename %s' % (i, filename)
|
||||
cmd = ['pdftotext', filename, '-']
|
||||
try:
|
||||
p = subprocess.Popen(cmd,
|
||||
close_fds=True,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
except OSError, e:
|
||||
print 'E: error running pdftotext'
|
||||
continue
|
||||
stdout, stderr = p.communicate()
|
||||
if p.returncode != 0:
|
||||
print 'E: error running pdftotext (rc:%d)' % p.returncode
|
||||
continue
|
||||
conn.add([{'id': filename, 'text': unicode(stdout, 'utf-8')}])
|
||||
if i%100 == 99:
|
||||
conn.commit()
|
||||
#if i == 500000: break
|
|
@ -0,0 +1,18 @@
|
|||
#! /usr/bin/env python
|
||||
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
from pysolr import Solr
|
||||
|
||||
conn = Solr('http://127.0.0.1:8080/solr/')
|
||||
query = unicode(sys.argv[1], 'utf-8')
|
||||
|
||||
results = conn.search(query, rows=20, fl='* score')
|
||||
|
||||
print 'number of results:', results.hits
|
||||
print 'qtime:', results.qtime
|
||||
|
||||
for result in results:
|
||||
print '%.2f - %s' % (result['score'], result['id'])
|
||||
|
Reference in New Issue