547 lines
18 KiB
XML
547 lines
18 KiB
XML
<?xml version="1.0" encoding="UTF-8" ?>
|
|
<!--
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
this work for additional information regarding copyright ownership.
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
(the "License"); you may not use this file except in compliance with
|
|
the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
-->
|
|
|
|
<!--
|
|
For more details about configurations options that may appear in
|
|
this file, see http://wiki.apache.org/solr/SolrConfigXml.
|
|
-->
|
|
<config>
|
|
|
|
<luceneMatchVersion>LUCENE_35</luceneMatchVersion>
|
|
|
|
<lib dir="../contrib/extraction/lib" regex=".*\.jar" />
|
|
<lib dir="../contrib/analysis-extras/lib" regex=".*\.jar" />
|
|
<lib dir="../contrib/analysis-extras/lucene-libs" regex=".*\.jar" />
|
|
|
|
<lib dir="../dist/" regex="apache-solr-cell-\d.*\.jar" />
|
|
<lib dir="../dist/" regex="apache-solr-core-\d.*\.jar" />
|
|
<lib dir="../dist/" regex="apache-solr-analysis-extras-\d.*\.jar" />
|
|
<lib dir="../dist/" regex="apache-solr-langid-\d.*\.jar" />
|
|
|
|
|
|
<dataDir>/Users/tom/VC/collective.solr/var/solr/data</dataDir>
|
|
|
|
<directoryFactory name="DirectoryFactory"
|
|
class="solr.StandardDirectoryFactory"/>
|
|
|
|
<!-- Index Defaults
|
|
|
|
Values here affect all index writers and act as a default
|
|
unless overridden.
|
|
|
|
WARNING: See also the <mainIndex> section below for parameters
|
|
that overfor Solr's main Lucene index.
|
|
-->
|
|
<indexDefaults>
|
|
|
|
<maxFieldLength>1000000000</maxFieldLength>
|
|
<writeLockTimeout>1000</writeLockTimeout>
|
|
<lockType>native</lockType>
|
|
|
|
</indexDefaults>
|
|
|
|
<!-- Main Index
|
|
|
|
Values here override the values in the <indexDefaults> section
|
|
for the main on disk index.
|
|
-->
|
|
<mainIndex>
|
|
|
|
<useCompoundFile>false</useCompoundFile>
|
|
<mergeFactor>10</mergeFactor>
|
|
<ramBufferSizeMB>16</ramBufferSizeMB>
|
|
<unlockOnStartup>true</unlockOnStartup>
|
|
<reopenReaders>true</reopenReaders>
|
|
|
|
<deletionPolicy class="solr.SolrDeletionPolicy">
|
|
<str name="maxCommitsToKeep">1</str>
|
|
<str name="maxOptimizedCommitsToKeep">0</str>
|
|
</deletionPolicy>
|
|
|
|
<!-- Lucene Infostream
|
|
|
|
To aid in advanced debugging, Lucene provides an "InfoStream"
|
|
of detailed information when indexing.
|
|
|
|
Setting The value to true will instruct the underlying Lucene
|
|
IndexWriter to write its debugging info the specified file
|
|
-->
|
|
<infoStream file="INFOSTREAM.txt">false</infoStream>
|
|
|
|
</mainIndex>
|
|
|
|
<!-- JMX
|
|
|
|
This example enables JMX if and only if an existing MBeanServer
|
|
is found, use this if you want to configure JMX through JVM
|
|
parameters. Remove this to disable exposing Solr configuration
|
|
and statistics to JMX.
|
|
|
|
For more details see http://wiki.apache.org/solr/SolrJmx
|
|
-->
|
|
<jmx />
|
|
|
|
<updateHandler class="solr.DirectUpdateHandler2">
|
|
|
|
|
|
|
|
</updateHandler>
|
|
|
|
<query>
|
|
<maxBooleanClauses>1024</maxBooleanClauses>
|
|
|
|
<!-- Filter Cache
|
|
|
|
Cache used by SolrIndexSearcher for filters (DocSets),
|
|
unordered sets of *all* documents that match a query. When a
|
|
new searcher is opened, its caches may be prepopulated or
|
|
"autowarmed" using data from caches in the old searcher.
|
|
autowarmCount is the number of items to prepopulate. For
|
|
LRUCache, the autowarmed items will be the most recently
|
|
accessed items.
|
|
|
|
Parameters:
|
|
class - the SolrCache implementation LRUCache or
|
|
(LRUCache or FastLRUCache)
|
|
size - the maximum number of entries in the cache
|
|
initialSize - the initial capacity (number of entries) of
|
|
the cache. (see java.util.HashMap)
|
|
autowarmCount - the number of entries to prepopulate from
|
|
and old cache.
|
|
-->
|
|
<filterCache class="solr.FastLRUCache"
|
|
size="16384"
|
|
initialSize="4096"
|
|
autowarmCount="4096"/>
|
|
|
|
<!-- Query Result Cache
|
|
|
|
Caches results of searches - ordered lists of document ids
|
|
(DocList) based on a query, a sort, and the range of documents requested.
|
|
-->
|
|
<queryResultCache class="solr.FastLRUCache"
|
|
size="128"
|
|
initialSize="64"
|
|
autowarmCount="32"/>
|
|
|
|
<!-- Document Cache
|
|
|
|
Caches Lucene Document objects (the stored fields for each
|
|
document). Since Lucene internal document ids are transient,
|
|
this cache will not be autowarmed.
|
|
-->
|
|
<documentCache class="solr.FastLRUCache"
|
|
size="512"
|
|
initialSize="512"/>
|
|
|
|
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
|
|
|
<!-- Result Window Size
|
|
|
|
An optimization for use with the queryResultCache. When a search
|
|
is requested, a superset of the requested number of document ids
|
|
are collected. For example, if a search for a particular query
|
|
requests matching documents 10 through 19, and queryWindowSize is 50,
|
|
then documents 0 through 49 will be collected and cached. Any further
|
|
requests in that range can be satisfied via the cache.
|
|
-->
|
|
<queryResultWindowSize>10</queryResultWindowSize>
|
|
|
|
<!-- Maximum number of documents to cache for any entry in the
|
|
queryResultCache.
|
|
-->
|
|
<queryResultMaxDocsCached>100</queryResultMaxDocsCached>
|
|
|
|
<!-- Use Cold Searcher
|
|
|
|
If a search request comes in and there is no current
|
|
registered searcher, then immediately register the still
|
|
warming searcher and use it. If "false" then all requests
|
|
will block until the first searcher is done warming.
|
|
-->
|
|
<useColdSearcher>false</useColdSearcher>
|
|
|
|
<!-- Max Warming Searchers
|
|
|
|
Maximum number of searchers that may be warming in the
|
|
background concurrently. An error is returned if this limit
|
|
is exceeded.
|
|
|
|
Recommend values of 1-2 for read-only slaves, higher for
|
|
masters w/o cache warming.
|
|
-->
|
|
<maxWarmingSearchers>4</maxWarmingSearchers>
|
|
|
|
</query>
|
|
|
|
<requestDispatcher handleSelect="true">
|
|
<!-- Request Parsing
|
|
|
|
These settings indicate how Solr Requests may be parsed, and
|
|
what restrictions may be placed on the ContentStreams from
|
|
those requests
|
|
|
|
enableRemoteStreaming - enables use of the stream.file
|
|
and stream.url parameters for specifying remote streams.
|
|
|
|
multipartUploadLimitInKB - specifies the max size of
|
|
Multipart File Uploads that Solr will allow in a Request.
|
|
|
|
*** WARNING ***
|
|
The settings below authorize Solr to fetch remote files, You
|
|
should make sure your system has some authentication before
|
|
using enableRemoteStreaming="true"
|
|
|
|
-->
|
|
<requestParsers enableRemoteStreaming="false"
|
|
multipartUploadLimitInKB="102400" />
|
|
|
|
<httpCaching lastModifiedFrom="openTime"
|
|
etagSeed="Solr">
|
|
</httpCaching>
|
|
|
|
</requestDispatcher>
|
|
|
|
<!-- SearchHandler
|
|
|
|
http://wiki.apache.org/solr/SearchHandler
|
|
|
|
For processing Search Queries, the primary Request Handler
|
|
provided with Solr is "SearchHandler" It delegates to a sequent
|
|
of SearchComponents (see below) and supports distributed
|
|
queries across multiple shards
|
|
-->
|
|
<requestHandler name="standard" class="solr.SearchHandler">
|
|
<lst name="defaults">
|
|
<str name="echoParams">explicit</str>
|
|
<int name="rows">500</int>
|
|
</lst>
|
|
</requestHandler>
|
|
|
|
<requestHandler name="search" class="solr.SearchHandler" default="true">
|
|
<lst name="defaults">
|
|
<str name="echoParams">explicit</str>
|
|
<int name="rows">500</int>
|
|
<str name="spellcheck.onlyMorePopular">false</str>
|
|
<str name="spellcheck.extendedResults">false</str>
|
|
<str name="spellcheck.count">1</str>
|
|
</lst>
|
|
|
|
<arr name="last-components">
|
|
<str>spellcheck</str>
|
|
</arr>
|
|
|
|
</requestHandler>
|
|
|
|
<!-- XML Update Request Handler.
|
|
|
|
http://wiki.apache.org/solr/UpdateXmlMessages
|
|
|
|
The canonical Request Handler for Modifying the Index through
|
|
commands specified using XML.
|
|
|
|
Note: Since solr1.1 requestHandlers requires a valid content
|
|
type header if posted in the body. For example, curl now
|
|
requires: -H 'Content-type:text/xml; charset=utf-8'
|
|
-->
|
|
<requestHandler name="/update"
|
|
class="solr.XmlUpdateRequestHandler">
|
|
</requestHandler>
|
|
|
|
<!-- Binary Update Request Handler
|
|
http://wiki.apache.org/solr/javabin
|
|
-->
|
|
<requestHandler name="/update/javabin"
|
|
class="solr.BinaryUpdateRequestHandler"
|
|
startup="lazy" />
|
|
|
|
<!-- CSV Update Request Handler
|
|
http://wiki.apache.org/solr/UpdateCSV
|
|
-->
|
|
<requestHandler name="/update/csv"
|
|
class="solr.CSVRequestHandler"
|
|
startup="lazy" />
|
|
|
|
<!-- JSON Update Request Handler
|
|
http://wiki.apache.org/solr/UpdateJSON
|
|
-->
|
|
<requestHandler name="/update/json"
|
|
class="solr.JsonUpdateRequestHandler" />
|
|
|
|
<!-- Solr Cell Update Request Handler
|
|
|
|
http://wiki.apache.org/solr/ExtractingRequestHandler
|
|
|
|
-->
|
|
<requestHandler name="/update/extract"
|
|
class="solr.extraction.ExtractingRequestHandler" >
|
|
<lst name="defaults">
|
|
<str name="fmap.content">tika_content</str>
|
|
<str name="lowernames">false</str>
|
|
<str name="uprefix">tika_</str>
|
|
</lst>
|
|
</requestHandler>
|
|
|
|
<!-- Field Analysis Request Handler
|
|
|
|
RequestHandler that provides much the same functionality as
|
|
analysis.jsp. Provides the ability to specify multiple field
|
|
types and field names in the same request and outputs
|
|
index-time and query-time analysis for each of them.
|
|
|
|
Request parameters are:
|
|
analysis.fieldname - field name whose analyzers are to be used
|
|
|
|
analysis.fieldtype - field type whose analyzers are to be used
|
|
analysis.fieldvalue - text for index-time analysis
|
|
q (or analysis.q) - text for query time analysis
|
|
analysis.showmatch (true|false) - When set to true and when
|
|
query analysis is performed, the produced tokens of the
|
|
field value analysis will be marked as "matched" for every
|
|
token that is produces by the query analysis
|
|
-->
|
|
<requestHandler name="/analysis/field"
|
|
startup="lazy"
|
|
class="solr.FieldAnalysisRequestHandler" />
|
|
|
|
|
|
<!-- Document Analysis Handler
|
|
|
|
http://wiki.apache.org/solr/AnalysisRequestHandler
|
|
|
|
An analysis handler that provides a breakdown of the analysis
|
|
process of provided docuemnts. This handler expects a (single)
|
|
content stream with the following format:
|
|
|
|
<docs>
|
|
<doc>
|
|
<field name="id">1</field>
|
|
<field name="name">The Name</field>
|
|
<field name="text">The Text Value</field>
|
|
</doc>
|
|
<doc>...</doc>
|
|
<doc>...</doc>
|
|
...
|
|
</docs>
|
|
|
|
Note: Each document must contain a field which serves as the
|
|
unique key. This key is used in the returned response to associate
|
|
an analysis breakdown to the analyzed document.
|
|
|
|
Like the FieldAnalysisRequestHandler, this handler also supports
|
|
query analysis by sending either an "analysis.query" or "q"
|
|
request parameter that holds the query text to be analyzed. It
|
|
also supports the "analysis.showmatch" parameter which when set to
|
|
true, all field tokens that match the query tokens will be marked
|
|
as a "match".
|
|
-->
|
|
<requestHandler name="/analysis/document"
|
|
class="solr.DocumentAnalysisRequestHandler"
|
|
startup="lazy" />
|
|
|
|
<!-- Admin Handlers
|
|
|
|
Admin Handlers - This will register all the standard admin
|
|
RequestHandlers.
|
|
-->
|
|
<requestHandler name="/admin/"
|
|
class="solr.admin.AdminHandlers" />
|
|
|
|
<!-- ping/healthcheck -->
|
|
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
|
|
<lst name="invariants">
|
|
<str name="qt">search</str>
|
|
<str name="q">solrpingquery</str>
|
|
</lst>
|
|
<lst name="defaults">
|
|
<str name="echoParams">all</str>
|
|
</lst>
|
|
</requestHandler>
|
|
|
|
<!-- Echo the request contents back to the client -->
|
|
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
|
|
<lst name="defaults">
|
|
<str name="echoParams">explicit</str>
|
|
<str name="echoHandler">true</str>
|
|
</lst>
|
|
</requestHandler>
|
|
|
|
<!-- Spell Check
|
|
|
|
The spell check component can return a list of alternative spelling
|
|
suggestions.
|
|
|
|
http://wiki.apache.org/solr/SpellCheckComponent
|
|
-->
|
|
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
|
|
|
|
<str name="queryAnalyzerFieldType">textSpell</str>
|
|
|
|
<lst name="spellchecker">
|
|
<str name="name">default</str>
|
|
<str name="field">default</str>
|
|
<str name="spellcheckIndexDir">spellchecker</str>
|
|
<float name="thresholdTokenFrequency">.001</float>
|
|
<str name="buildOnOptimize">true</str>
|
|
</lst>
|
|
|
|
</searchComponent>
|
|
|
|
<!-- Term Vector Component
|
|
|
|
http://wiki.apache.org/solr/TermVectorComponent
|
|
-->
|
|
<searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
|
|
|
|
<!-- Terms Component
|
|
|
|
http://wiki.apache.org/solr/TermsComponent
|
|
|
|
A component to return terms and document frequency of those
|
|
terms
|
|
-->
|
|
<searchComponent name="terms" class="solr.TermsComponent"/>
|
|
|
|
<!-- Query Elevation Component
|
|
|
|
http://wiki.apache.org/solr/QueryElevationComponent
|
|
|
|
a search component that enables you to configure the top
|
|
results for a given query regardless of the normal lucene
|
|
scoring.
|
|
-->
|
|
<searchComponent name="elevator" class="solr.QueryElevationComponent" >
|
|
<!-- pick a fieldType to analyze queries -->
|
|
<str name="queryFieldType">string</str>
|
|
<str name="config-file">elevate.xml</str>
|
|
</searchComponent>
|
|
|
|
<!-- Highlighting Component
|
|
|
|
http://wiki.apache.org/solr/HighlightingParameters
|
|
-->
|
|
<searchComponent class="solr.HighlightComponent" name="highlight">
|
|
<highlighting>
|
|
<!-- Configure the standard fragmenter -->
|
|
<!-- This could most likely be commented out in the "default" case -->
|
|
<fragmenter name="gap"
|
|
default="true"
|
|
class="solr.highlight.GapFragmenter">
|
|
<lst name="defaults">
|
|
<int name="hl.fragsize">100</int>
|
|
</lst>
|
|
</fragmenter>
|
|
|
|
<!-- A regular-expression-based fragmenter
|
|
(for sentence extraction)
|
|
-->
|
|
<fragmenter name="regex"
|
|
class="solr.highlight.RegexFragmenter">
|
|
<lst name="defaults">
|
|
<!-- slightly smaller fragsizes work better because of slop -->
|
|
<int name="hl.fragsize">70</int>
|
|
<!-- allow 50% slop on fragment sizes -->
|
|
<float name="hl.regex.slop">0.5</float>
|
|
<!-- a basic sentence pattern -->
|
|
<str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
|
|
</lst>
|
|
</fragmenter>
|
|
|
|
<!-- Configure the standard formatter -->
|
|
<formatter name="html"
|
|
default="true"
|
|
class="solr.highlight.HtmlFormatter">
|
|
<lst name="defaults">
|
|
<str name="hl.simple.pre"><![CDATA[<em>]]></str>
|
|
<str name="hl.simple.post"><![CDATA[</em>]]></str>
|
|
</lst>
|
|
</formatter>
|
|
|
|
<!-- Configure the standard encoder -->
|
|
<encoder name="html"
|
|
class="solr.highlight.HtmlEncoder" />
|
|
|
|
<!-- Configure the standard fragListBuilder -->
|
|
<fragListBuilder name="simple"
|
|
default="true"
|
|
class="solr.highlight.SimpleFragListBuilder"/>
|
|
|
|
<!-- Configure the single fragListBuilder -->
|
|
<fragListBuilder name="single"
|
|
class="solr.highlight.SingleFragListBuilder"/>
|
|
|
|
<!-- default tag FragmentsBuilder -->
|
|
<fragmentsBuilder name="default"
|
|
default="true"
|
|
class="solr.highlight.ScoreOrderFragmentsBuilder">
|
|
</fragmentsBuilder>
|
|
|
|
<!-- multi-colored tag FragmentsBuilder -->
|
|
<fragmentsBuilder name="colored"
|
|
class="solr.highlight.ScoreOrderFragmentsBuilder">
|
|
<lst name="defaults">
|
|
<str name="hl.tag.pre"><![CDATA[
|
|
<b style="background:yellow">,<b style="background:lawgreen">,
|
|
<b style="background:aquamarine">,<b style="background:magenta">,
|
|
<b style="background:palegreen">,<b style="background:coral">,
|
|
<b style="background:wheat">,<b style="background:khaki">,
|
|
<b style="background:lime">,<b style="background:deepskyblue">]]></str>
|
|
<str name="hl.tag.post"><![CDATA[</b>]]></str>
|
|
</lst>
|
|
</fragmentsBuilder>
|
|
</highlighting>
|
|
</searchComponent>
|
|
|
|
<!--
|
|
This example update chain identifies the language of the incoming
|
|
documents using the langid contrib. The detected language is
|
|
written to field language_s. No field name mapping is done.
|
|
The fields used for detection are text, title, subject and description,
|
|
making this example suitable for detecting languages form full-text
|
|
rich documents injected via ExtractingRequestHandler.
|
|
See more about langId at http://wiki.apache.org/solr/LanguageDetection
|
|
-->
|
|
<!--
|
|
<updateRequestProcessorChain name="langid">
|
|
<processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
|
|
<str name="langid.fl">text,title,subject,description</str>
|
|
<str name="langid.langField">language_s</str>
|
|
<str name="langid.fallback">en</str>
|
|
</processor>
|
|
<processor class="solr.LogUpdateProcessorFactory" />
|
|
<processor class="solr.RunUpdateProcessorFactory" />
|
|
</updateRequestProcessorChain>
|
|
-->
|
|
|
|
<!-- XSLT response writer transforms the XML output by any xslt file found
|
|
in Solr's conf/xslt directory. Changes to xslt files are checked for
|
|
every xsltCacheLifetimeSeconds.
|
|
-->
|
|
<queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
|
|
<int name="xsltCacheLifetimeSeconds">5</int>
|
|
</queryResponseWriter>
|
|
|
|
<!-- Legacy config for the admin interface -->
|
|
<admin>
|
|
<defaultQuery>*:*</defaultQuery>
|
|
</admin>
|
|
|
|
|
|
|
|
</config>
|