Zookeeper setup

author Dennis Schafroth <dennis@indexdata.com>

Fri, 29 Nov 2013 11:14:30 +0000 (12:14 +0100)

committer Dennis Schafroth <dennis@indexdata.com>

Fri, 29 Nov 2013 11:14:30 +0000 (12:14 +0100)
author Dennis Schafroth <dennis@indexdata.com>
Fri, 29 Nov 2013 11:14:30 +0000 (12:14 +0100)
committer Dennis Schafroth <dennis@indexdata.com>
Fri, 29 Nov 2013 11:14:30 +0000 (12:14 +0100)
diff --git a/zookeeper/.#make_same_host_config.sh b/zookeeper/.#make_same_host_config.sh

new file mode 120000 (symlink)

index 0000000..da3e3cb
--- /dev/null
+++ b/zookeeper/.#make_same_host_config.sh
@@ -0,0 +1 @@
+dennis@opencontent-solr.index.567:1381402202
\ No newline at end of file
diff --git a/zookeeper/README.txt b/zookeeper/README.txt

new file mode 100644 (file)

index 0000000..b95697f
--- /dev/null
+++ b/zookeeper/README.txt
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Solr example
+------------
+
+This directory contains an instance of the Jetty Servlet container setup to 
+run Solr using an example configuration.
+
+To run this example:
+
+  java -jar start.jar
+
+in this example directory, and when Solr is started connect to 
+
+  http://localhost:8983/solr/
+
+To add documents to the index, use the post.jar (or post.sh script) in
+the example/exampledocs subdirectory (while Solr is running), for example:
+
+     cd exampledocs
+     java -jar post.jar *.xml
+Or:  sh post.sh *.xml
+
+For more information about this example please read...
+
+ * example/solr/README.txt
+   For more information about the "Solr Home" and Solr specific configuration
+ * http://lucene.apache.org/solr/tutorial.html
+   For a Tutorial using this example configuration
+ * http://wiki.apache.org/solr/SolrResources 
+   For a list of other tutorials and introductory articles.
+
+Notes About These Examples
+--------------------------
+
+* SolrHome *
+
+By default, start.jar starts Solr in Jetty using the default Solr Home
+directory of "./solr/" (relative to the working directory of hte servlet 
+container).  To run other example configurations, you can specify the 
+solr.solr.home system property when starting jetty...
+
+  java -Dsolr.solr.home=multicore -jar start.jar
+  java -Dsolr.solr.home=example-DIH/solr -jar start.jar
+
+* References to Jar Files Outside This Directory *
+
+Various example SolrHome dirs contained in this directory may use "<lib>"
+statements in the solrconfig.xml file to reference plugin jars outside of 
+this directory for loading "contrib" plugins via relative paths.  
+
+If you make a copy of this example server and wish to use the 
+ExtractingRequestHandler (SolrCell), DataImportHandler (DIH), UIMA, the 
+clustering component, or any other modules in "contrib", you will need to 
+copy the required jars or update the paths to those jars in your 
+solrconfig.xml.
+
+* Logging *
+
+By default, Jetty & Solr will log to the console a logs/solr.log. This can be convenient when 
+first getting started, but eventually you will want to log just to a file. To 
+configure logging, edit the log4j.properties file in "resources".
+ 
+It is also possible to setup log4j or other popular logging frameworks.
+
diff --git a/zookeeper/cloud-scripts/log4j.properties b/zookeeper/cloud-scripts/log4j.properties

new file mode 100644 (file)

index 0000000..c581583
--- /dev/null
+++ b/zookeeper/cloud-scripts/log4j.properties
@@ -0,0 +1,8 @@
+#  Logging level
+log4j.rootLogger=INFO, stderr
+
+# log to stderr
+log4j.appender.stderr = org.apache.log4j.ConsoleAppender
+log4j.appender.stderr.Target = System.err
+log4j.appender.stderr.layout = org.apache.log4j.PatternLayout
+log4j.appender.stderr.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %C; %m\n
diff --git a/zookeeper/cloud-scripts/zkcli.bat b/zookeeper/cloud-scripts/zkcli.bat

new file mode 100644 (file)

index 0000000..8232a72
--- /dev/null
+++ b/zookeeper/cloud-scripts/zkcli.bat
@@ -0,0 +1,11 @@
+REM You can override pass the following parameters to this script:\r
+REM \r
+\r
+set JVM=java\r
+\r
+REM  Find location of this script\r
+\r
+set SDIR=%~dp0\r
+if "%SDIR:~-1%"=="\" set SDIR=%SDIR:~0,-1%\r
+\r
+"%JVM%" -Dlog4j.configuration=file:%SDIR%\log4j.properties -classpath "%SDIR%\..\solr-webapp\webapp\WEB-INF\lib\*;%SDIR%\..\lib\ext\*" org.apache.solr.cloud.ZkCLI %*\r
diff --git a/zookeeper/cloud-scripts/zkcli.sh b/zookeeper/cloud-scripts/zkcli.sh

new file mode 100755 (executable)

index 0000000..ab5da96
--- /dev/null
+++ b/zookeeper/cloud-scripts/zkcli.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+# You can override pass the following parameters to this script:
+# 
+
+JVM="java"
+
+# Find location of this script
+
+sdir="`dirname \"$0\"`"
+
+PATH=$JAVA_HOME/bin:$PATH $JVM -Dlog4j.configuration=file:$sdir/log4j.properties -classpath "$sdir/../solr-webapp/webapp/WEB-INF/lib/*:$sdir/../lib/ext/*" org.apache.solr.cloud.ZkCLI ${1+"$@"}
+
diff --git a/zookeeper/contexts/solr-jetty-context.xml b/zookeeper/contexts/solr-jetty-context.xml

new file mode 100644 (file)

index 0000000..50978a3
--- /dev/null
+++ b/zookeeper/contexts/solr-jetty-context.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<!DOCTYPE Configure PUBLIC "-//Jetty//Configure//EN" "http://www.eclipse.org/jetty/configure.dtd">
+<Configure class="org.eclipse.jetty.webapp.WebAppContext">
+  <Set name="contextPath"><SystemProperty name="hostContext" default="/solr"/></Set>
+  <Set name="war"><SystemProperty name="jetty.home"/>/webapps/solr.war</Set>
+  <Set name="defaultsDescriptor"><SystemProperty name="jetty.home"/>/etc/webdefault.xml</Set>
+  <Set name="tempDirectory"><Property name="jetty.home" default="."/>/solr-webapp</Set>
+</Configure>
diff --git a/zookeeper/etc/create-solrtest.keystore.sh b/zookeeper/etc/create-solrtest.keystore.sh

new file mode 100755 (executable)

index 0000000..d3decee
--- /dev/null
+++ b/zookeeper/etc/create-solrtest.keystore.sh
@@ -0,0 +1,37 @@
+#!/bin/bash -ex
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+############
+ 
+# This script shows how the solrtest.keystore file used for solr tests 
+# and these example configs was generated.
+#
+# Running this script should only be necessary if the keystore file
+# needs to be replaced, which shouldn't be required until sometime around
+# the year 4751.
+#
+# NOTE: the "-ext" option used in the "keytool" command requires that you have
+# the java7 version of keytool, but the generated key will work with any 
+# version of java
+
+echo "### remove old keystore"
+rm -f solrtest.keystore
+
+echo "### create keystore and keys"
+keytool -keystore solrtest.keystore -storepass "secret" -alias solrtest -keypass "secret" -genkey -keyalg RSA -dname "cn=localhost, ou=SolrTest, o=lucene.apache.org, c=US" -ext "san=ip:127.0.0.1" -validity 999999
+
+
diff --git a/zookeeper/etc/jetty.xml b/zookeeper/etc/jetty.xml

new file mode 100644 (file)

index 0000000..8e7093d
--- /dev/null
+++ b/zookeeper/etc/jetty.xml
@@ -0,0 +1,205 @@
+<?xml version="1.0"?>
+<!DOCTYPE Configure PUBLIC "-//Jetty//Configure//EN" "http://www.eclipse.org/jetty/configure.dtd">
+
+<!-- =============================================================== -->
+<!-- Configure the Jetty Server                                      -->
+<!--                                                                 -->
+<!-- Documentation of this file format can be found at:              -->
+<!-- http://wiki.eclipse.org/Jetty/Reference/jetty.xml_syntax        -->
+<!--                                                                 -->
+<!-- =============================================================== -->
+
+
+<Configure id="Server" class="org.eclipse.jetty.server.Server">
+
+    <!-- =========================================================== -->
+    <!-- Server Thread Pool                                          -->
+    <!-- =========================================================== -->
+    <Set name="ThreadPool">
+      <!-- Default queued blocking threadpool -->
+      <New class="org.eclipse.jetty.util.thread.QueuedThreadPool">
+        <Set name="minThreads">10</Set>
+        <Set name="maxThreads">10000</Set>
+        <Set name="detailedDump">false</Set>
+      </New>
+    </Set>
+
+    <!-- =========================================================== -->
+    <!-- Set connectors                                              -->
+    <!-- =========================================================== -->
+
+  <!--
+    <Call name="addConnector">
+      <Arg>
+          <New class="org.eclipse.jetty.server.nio.SelectChannelConnector">
+            <Set name="host"><SystemProperty name="jetty.host" /></Set>
+            <Set name="port"><SystemProperty name="jetty.port" default="8983"/></Set>
+            <Set name="maxIdleTime">50000</Set>
+            <Set name="Acceptors">2</Set>
+            <Set name="statsOn">false</Set>
+            <Set name="confidentialPort">8443</Set>
+           <Set name="lowResourcesConnections">5000</Set>
+           <Set name="lowResourcesMaxIdleTime">5000</Set>
+          </New>
+      </Arg>
+    </Call>
+  -->
+
+    <!-- This connector is currently being used for Solr because it
+          showed better performance than nio.SelectChannelConnector
+          for typical Solr requests.  -->
+    <Call name="addConnector">
+      <Arg>
+          <New class="org.eclipse.jetty.server.bio.SocketConnector">
+            <Call class="java.lang.System" name="setProperty"> <Arg>log4j.configuration</Arg> <Arg>etc/log4j.properties</Arg> </Call>
+            <Set name="host"><SystemProperty name="jetty.host" /></Set>
+            <Set name="port"><SystemProperty name="jetty.port" default="8983"/></Set>
+            <Set name="maxIdleTime">50000</Set>
+            <Set name="lowResourceMaxIdleTime">1500</Set>
+            <Set name="statsOn">false</Set>
+          </New>
+      </Arg>
+    </Call>
+
+    <!-- if the connector below is uncommented, then jetty will also accept SSL
+         connections on port 8984, using a self signed certificate and can 
+         optionally require the client to authenticate with a certificate. 
+         (which can be the same as the server certificate_
+         
+         # Run solr example with SSL on port 8984
+         java -jar start.jar
+         # 
+         # Run post.jar so that it trusts the server cert...
+         java -Djavax.net.ssl.trustStore=../etc/solrtest.keystore -Durl=https://localhost:8984/solr/update -jar post.jar *.xml
+
+         # Run solr example with SSL requiring client certs on port 8984
+         java -Djetty.ssl.clientAuth=true -jar start.jar
+         #
+         # Run post.jar so that it trusts the server cert, 
+         # and authenticates with a client cert
+         java -Djavax.net.ssl.keyStorePassword=secret -Djavax.net.ssl.keyStore=../etc/solrtest.keystore -Djavax.net.ssl.trustStore=../etc/solrtest.keystore -Durl=https://localhost:8984/solr/update -jar post.jar *.xml
+
+    -->
+    <!--
+    <Call name="addConnector">
+      <Arg>
+        <New class="org.eclipse.jetty.server.ssl.SslSelectChannelConnector">
+          <Arg>
+            <New class="org.eclipse.jetty.http.ssl.SslContextFactory">
+              <Set name="keyStore"><SystemProperty name="jetty.home" default="."/>/etc/solrtest.keystore</Set>
+              <Set name="keyStorePassword">secret</Set>
+              <Set name="needClientAuth"><SystemProperty name="jetty.ssl.clientAuth" default="false"/></Set>
+            </New>
+          </Arg>
+          <Set name="port"><SystemProperty name="jetty.ssl.port" default="8984"/></Set>
+          <Set name="maxIdleTime">30000</Set>
+        </New>
+      </Arg>
+    </Call>
+    -->
+
+    <!-- =========================================================== -->
+    <!-- Set handler Collection Structure                            --> 
+    <!-- =========================================================== -->
+    <Set name="handler">
+      <New id="Handlers" class="org.eclipse.jetty.server.handler.HandlerCollection">
+        <Set name="handlers">
+         <Array type="org.eclipse.jetty.server.Handler">
+           <Item>
+             <New id="Contexts" class="org.eclipse.jetty.server.handler.ContextHandlerCollection"/>
+           </Item>
+           <Item>
+             <New id="DefaultHandler" class="org.eclipse.jetty.server.handler.DefaultHandler"/>
+           </Item>
+           <Item>
+             <New id="RequestLog" class="org.eclipse.jetty.server.handler.RequestLogHandler"/>
+           </Item>
+         </Array>
+        </Set>
+      </New>
+    </Set>
+    
+    <!-- =========================================================== -->
+    <!-- Configure Request Log                                       -->
+    <!-- =========================================================== -->
+    <!-- 
+    <Ref id="Handlers">
+      <Call name="addHandler">
+        <Arg>
+          <New id="RequestLog" class="org.eclipse.jetty.server.handler.RequestLogHandler">
+            <Set name="requestLog">
+              <New id="RequestLogImpl" class="org.eclipse.jetty.server.NCSARequestLog">
+                <Set name="filename">
+                   logs/request.yyyy_mm_dd.log
+                </Set>
+                <Set name="filenameDateFormat">yyyy_MM_dd</Set>
+                <Set name="retainDays">90</Set>
+                <Set name="append">true</Set>
+                <Set name="extended">false</Set>
+                <Set name="logCookies">false</Set>
+                <Set name="LogTimeZone">UTC</Set>
+              </New>
+            </Set>
+          </New>
+        </Arg>
+      </Call>
+    </Ref>
+    -->
+
+    <!-- =========================================================== -->
+    <!-- extra options                                               -->
+    <!-- =========================================================== -->
+    <Set name="stopAtShutdown">true</Set>
+    <Set name="sendServerVersion">false</Set>
+    <Set name="sendDateHeader">false</Set>
+    <Set name="gracefulShutdown">1000</Set>
+    <Set name="dumpAfterStart">false</Set>
+    <Set name="dumpBeforeStop">false</Set>
+
+
+
+
+    <Call name="addBean">
+      <Arg>
+        <New id="DeploymentManager" class="org.eclipse.jetty.deploy.DeploymentManager">
+          <Set name="contexts">
+            <Ref id="Contexts" />
+          </Set>
+          <Call name="setContextAttribute">
+            <Arg>org.eclipse.jetty.server.webapp.ContainerIncludeJarPattern</Arg>
+            <Arg>.*/servlet-api-[^/]*\.jar$</Arg>
+          </Call>
+          
+          
+          <!-- Add a customize step to the deployment lifecycle -->
+          <!-- uncomment and replace DebugBinding with your extended AppLifeCycle.Binding class 
+          <Call name="insertLifeCycleNode">
+            <Arg>deployed</Arg>
+            <Arg>starting</Arg>
+            <Arg>customise</Arg>
+          </Call>
+          <Call name="addLifeCycleBinding">
+            <Arg>
+              <New class="org.eclipse.jetty.deploy.bindings.DebugBinding">
+                <Arg>customise</Arg>
+              </New>
+            </Arg>
+          </Call>
+          -->
+          
+        </New>
+      </Arg>
+    </Call>
+    
+    <Ref id="DeploymentManager">
+      <Call name="addAppProvider">
+        <Arg>
+          <New class="org.eclipse.jetty.deploy.providers.ContextProvider">
+            <Set name="monitoredDirName"><SystemProperty name="jetty.home" default="."/>/contexts</Set>
+            <Set name="scanInterval">0</Set>
+          </New>
+        </Arg>
+      </Call>
+    </Ref>
+
+</Configure>
diff --git a/zookeeper/etc/logging.properties b/zookeeper/etc/logging.properties

new file mode 100644 (file)

index 0000000..063b36f
--- /dev/null
+++ b/zookeeper/etc/logging.properties
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To use this log config, start solr with the following system property: 
+# -Djava.util.logging.config.file=etc/logging.properties
+
+## Default global logging level:
+.level = INFO
+
+## Log every update command (add, delete, commit, ...)
+#org.apache.solr.update.processor.LogUpdateProcessor.level = FINE
+
+## Where to log (space separated list).
+handlers = java.util.logging.FileHandler
+
+java.util.logging.FileHandler.level = FINE
+
+java.util.logging.FileHandler.formatter = java.util.logging.SimpleFormatter
+
+# 1 GB limit per file
+java.util.logging.FileHandler.limit = 1073741824
+
+# Log to the logs directory, with log files named solrxxx.log
+java.util.logging.FileHandler.pattern = ./logs/solr%u.log
\ No newline at end of file
diff --git a/zookeeper/etc/solrtest.keystore b/zookeeper/etc/solrtest.keystore

new file mode 100644 (file)

index 0000000..bcc6ec0

Binary files /dev/null and b/zookeeper/etc/solrtest.keystore differ
diff --git a/zookeeper/etc/webdefault.xml b/zookeeper/etc/webdefault.xml

new file mode 100644 (file)

index 0000000..213138b
--- /dev/null
+++ b/zookeeper/etc/webdefault.xml
@@ -0,0 +1,527 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+
+  <!-- ===================================================================== -->
+  <!-- This file contains the default descriptor for web applications.       -->
+  <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
+  <!-- The intent of this descriptor is to include jetty specific or common  -->
+  <!-- configuration for all webapps.   If a context has a webdefault.xml    -->
+  <!-- descriptor, it is applied before the contexts own web.xml file        -->
+  <!--                                                                       -->
+  <!-- A context may be assigned a default descriptor by:                    -->
+  <!--  + Calling WebApplicationContext.setDefaultsDescriptor                -->
+  <!--  + Passed an arg to addWebApplications                                -->
+  <!--                                                                       -->
+  <!-- This file is used both as the resource within the jetty.jar (which is -->
+  <!-- used as the default if no explicit defaults descriptor is set) and it -->
+  <!-- is copied to the etc directory of the Jetty distro and explicitly     -->
+  <!-- by the jetty.xml file.                                                -->
+  <!--                                                                       -->
+  <!-- ===================================================================== -->
+<web-app
+  xmlns="http://java.sun.com/xml/ns/javaee"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"
+  metadata-complete="true"
+  version="2.5"
+>
+
+  <description>
+    Default web.xml file.  
+    This file is applied to a Web application before it's own WEB_INF/web.xml file
+  </description>
+
+  <!-- ==================================================================== -->
+  <!-- Removes static references to beans from javax.el.BeanELResolver to   -->
+  <!-- ensure webapp classloader can be released on undeploy                -->
+  <!-- ==================================================================== -->
+  <listener>
+   <listener-class>org.eclipse.jetty.servlet.listener.ELContextCleaner</listener-class>
+  </listener>
+  
+  <!-- ==================================================================== -->
+  <!-- Removes static cache of Methods from java.beans.Introspector to      -->
+  <!-- ensure webapp classloader can be released on undeploy                -->
+  <!-- ==================================================================== -->  
+  <listener>
+   <listener-class>org.eclipse.jetty.servlet.listener.IntrospectorCleaner</listener-class>
+  </listener>
+  
+
+  <!-- ==================================================================== -->
+  <!-- Context params to control Session Cookies                            -->
+  <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  -->
+  <!--
+    UNCOMMENT TO ACTIVATE <context-param> <param-name>org.eclipse.jetty.servlet.SessionDomain</param-name> <param-value>127.0.0.1</param-value> </context-param> <context-param>
+    <param-name>org.eclipse.jetty.servlet.SessionPath</param-name> <param-value>/</param-value> </context-param> <context-param> <param-name>org.eclipse.jetty.servlet.MaxAge</param-name>
+    <param-value>-1</param-value> </context-param>
+  -->
+
+  <!-- ==================================================================== -->
+  <!-- The default servlet.                                                 -->
+  <!-- This servlet, normally mapped to /, provides the handling for static -->
+  <!-- content, OPTIONS and TRACE methods for the context.                  -->
+  <!-- The following initParameters are supported:                          -->
+  <!--  
+ *  acceptRanges      If true, range requests and responses are
+ *                    supported
+ *
+ *  dirAllowed        If true, directory listings are returned if no
+ *                    welcome file is found. Else 403 Forbidden.
+ *
+ *  welcomeServlets   If true, attempt to dispatch to welcome files
+ *                    that are servlets, but only after no matching static
+ *                    resources could be found. If false, then a welcome
+ *                    file must exist on disk. If "exact", then exact
+ *                    servlet matches are supported without an existing file.
+ *                    Default is true.
+ *
+ *                    This must be false if you want directory listings,
+ *                    but have index.jsp in your welcome file list.
+ *
+ *  redirectWelcome   If true, welcome files are redirected rather than
+ *                    forwarded to.
+ *
+ *  gzip              If set to true, then static content will be served as
+ *                    gzip content encoded if a matching resource is
+ *                    found ending with ".gz"
+ *
+ *  resourceBase      Set to replace the context resource base
+ *
+ *  resourceCache     If set, this is a context attribute name, which the servlet 
+ *                    will use to look for a shared ResourceCache instance. 
+ *                        
+ *  relativeResourceBase
+ *                    Set with a pathname relative to the base of the
+ *                    servlet context root. Useful for only serving static content out
+ *                    of only specific subdirectories.
+ *
+ *  aliases           If True, aliases of resources are allowed (eg. symbolic
+ *                    links and caps variations). May bypass security constraints.
+ *
+ *  maxCacheSize      The maximum total size of the cache or 0 for no cache.
+ *  maxCachedFileSize The maximum size of a file to cache
+ *  maxCachedFiles    The maximum number of files to cache
+ *
+ *  useFileMappedBuffer
+ *                    If set to true, it will use mapped file buffer to serve static content
+ *                    when using NIO connector. Setting this value to false means that
+ *                    a direct buffer will be used instead of a mapped file buffer.
+ *                    By default, this is set to true.
+ *
+ *  cacheControl      If set, all static content will have this value set as the cache-control
+ *                    header.
+ -->
+ 
+ 
+  <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  -->
+  <servlet>
+    <servlet-name>default</servlet-name>
+    <servlet-class>org.eclipse.jetty.servlet.DefaultServlet</servlet-class>
+    <init-param>
+      <param-name>aliases</param-name>
+      <param-value>false</param-value>
+    </init-param>
+    <init-param>
+      <param-name>acceptRanges</param-name>
+      <param-value>true</param-value>
+    </init-param>
+    <init-param>
+      <param-name>dirAllowed</param-name>
+      <param-value>true</param-value>
+    </init-param>
+    <init-param>
+      <param-name>welcomeServlets</param-name>
+      <param-value>false</param-value>
+    </init-param>
+    <init-param>
+      <param-name>redirectWelcome</param-name>
+      <param-value>false</param-value>
+    </init-param>
+    <init-param>
+      <param-name>maxCacheSize</param-name>
+      <param-value>256000000</param-value>
+    </init-param>
+    <init-param>
+      <param-name>maxCachedFileSize</param-name>
+      <param-value>200000000</param-value>
+    </init-param>
+    <init-param>
+      <param-name>maxCachedFiles</param-name>
+      <param-value>2048</param-value>
+    </init-param>
+    <init-param>
+      <param-name>gzip</param-name>
+      <param-value>true</param-value>
+    </init-param>
+    <init-param>
+      <param-name>useFileMappedBuffer</param-name>
+      <param-value>true</param-value>
+    </init-param>
+    <!--
+    <init-param>
+      <param-name>resourceCache</param-name>
+      <param-value>resourceCache</param-value>
+    </init-param>
+    -->
+    <!--
+    <init-param>
+      <param-name>cacheControl</param-name>
+      <param-value>max-age=3600,public</param-value>
+    </init-param>
+    -->
+    <load-on-startup>0</load-on-startup>
+  </servlet>
+
+  <servlet-mapping>
+    <servlet-name>default</servlet-name>
+    <url-pattern>/</url-pattern>
+  </servlet-mapping>
+
+
+  <!-- ==================================================================== -->
+  <!-- JSP Servlet                                                          -->
+  <!-- This is the jasper JSP servlet from the jakarta project              -->
+  <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  -->
+  <!-- The JSP page compiler and execution servlet, which is the mechanism  -->
+  <!-- used by Glassfish to support JSP pages.  Traditionally, this servlet -->
+  <!-- is mapped to URL patterh "*.jsp".  This servlet supports the         -->
+  <!-- following initialization parameters (default values are in square    -->
+  <!-- brackets):                                                           -->
+  <!--                                                                      -->
+  <!--   checkInterval       If development is false and reloading is true, -->
+  <!--                       background compiles are enabled. checkInterval -->
+  <!--                       is the time in seconds between checks to see   -->
+  <!--                       if a JSP page needs to be recompiled. [300]    -->
+  <!--                                                                      -->
+  <!--   compiler            Which compiler Ant should use to compile JSP   -->
+  <!--                       pages.  See the Ant documenation for more      -->
+  <!--                       information. [javac]                           -->
+  <!--                                                                      -->
+  <!--   classdebuginfo      Should the class file be compiled with         -->
+  <!--                       debugging information?  [true]                 -->
+  <!--                                                                      -->
+  <!--   classpath           What class path should I use while compiling   -->
+  <!--                       generated servlets?  [Created dynamically      -->
+  <!--                       based on the current web application]          -->
+  <!--                       Set to ? to make the container explicitly set  -->
+  <!--                       this parameter.                                -->
+  <!--                                                                      -->
+  <!--   development         Is Jasper used in development mode (will check -->
+  <!--                       for JSP modification on every access)?  [true] -->
+  <!--                                                                      -->
+  <!--   enablePooling       Determines whether tag handler pooling is      -->
+  <!--                       enabled  [true]                                -->
+  <!--                                                                      -->
+  <!--   fork                Tell Ant to fork compiles of JSP pages so that -->
+  <!--                       a separate JVM is used for JSP page compiles   -->
+  <!--                       from the one Tomcat is running in. [true]      -->
+  <!--                                                                      -->
+  <!--   ieClassId           The class-id value to be sent to Internet      -->
+  <!--                       Explorer when using <jsp:plugin> tags.         -->
+  <!--                       [clsid:8AD9C840-044E-11D1-B3E9-00805F499D93]   -->
+  <!--                                                                      -->
+  <!--   javaEncoding        Java file encoding to use for generating java  -->
+  <!--                       source files. [UTF-8]                          -->
+  <!--                                                                      -->
+  <!--   keepgenerated       Should we keep the generated Java source code  -->
+  <!--                       for each page instead of deleting it? [true]   -->
+  <!--                                                                      -->
+  <!--   logVerbosityLevel   The level of detailed messages to be produced  -->
+  <!--                       by this servlet.  Increasing levels cause the  -->
+  <!--                       generation of more messages.  Valid values are -->
+  <!--                       FATAL, ERROR, WARNING, INFORMATION, and DEBUG. -->
+  <!--                       [WARNING]                                      -->
+  <!--                                                                      -->
+  <!--   mappedfile          Should we generate static content with one     -->
+  <!--                       print statement per input line, to ease        -->
+  <!--                       debugging?  [false]                            -->
+  <!--                                                                      -->
+  <!--                                                                      -->
+  <!--   reloading           Should Jasper check for modified JSPs?  [true] -->
+  <!--                                                                      -->
+  <!--   suppressSmap        Should the generation of SMAP info for JSR45   -->
+  <!--                       debugging be suppressed?  [false]              -->
+  <!--                                                                      -->
+  <!--   dumpSmap            Should the SMAP info for JSR45 debugging be    -->
+  <!--                       dumped to a file? [false]                      -->
+  <!--                       False if suppressSmap is true                  -->
+  <!--                                                                      -->
+  <!--   scratchdir          What scratch directory should we use when      -->
+  <!--                       compiling JSP pages?  [default work directory  -->
+  <!--                       for the current web application]               -->
+  <!--                                                                      -->
+  <!--   tagpoolMaxSize      The maximum tag handler pool size  [5]         -->
+  <!--                                                                      -->
+  <!--   xpoweredBy          Determines whether X-Powered-By response       -->
+  <!--                       header is added by generated servlet  [false]  -->
+  <!--                                                                      -->
+  <!-- If you wish to use Jikes to compile JSP pages:                       -->
+  <!--   Set the init parameter "compiler" to "jikes".  Define              -->
+  <!--   the property "-Dbuild.compiler.emacs=true" when starting Jetty     -->
+  <!--   to cause Jikes to emit error messages in a format compatible with  -->
+  <!--   Jasper.                                                            -->
+  <!--   If you get an error reporting that jikes can't use UTF-8 encoding, -->
+  <!--   try setting the init parameter "javaEncoding" to "ISO-8859-1".     -->
+  <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  -->
+  <servlet
+    id="jsp"
+  >
+    <servlet-name>jsp</servlet-name>
+    <servlet-class>org.apache.jasper.servlet.JspServlet</servlet-class>
+    <init-param>
+      <param-name>logVerbosityLevel</param-name>
+      <param-value>DEBUG</param-value>
+    </init-param>
+    <init-param>
+      <param-name>fork</param-name>
+      <param-value>false</param-value>
+    </init-param>
+    <init-param>
+      <param-name>xpoweredBy</param-name>
+      <param-value>false</param-value>
+    </init-param>
+    <!--  
+    <init-param>
+        <param-name>classpath</param-name>
+        <param-value>?</param-value>
+    </init-param>
+    -->
+    <load-on-startup>0</load-on-startup>
+  </servlet>
+
+  <servlet-mapping>
+    <servlet-name>jsp</servlet-name>
+    <url-pattern>*.jsp</url-pattern>
+    <url-pattern>*.jspf</url-pattern>
+    <url-pattern>*.jspx</url-pattern>
+    <url-pattern>*.xsp</url-pattern>
+    <url-pattern>*.JSP</url-pattern>
+    <url-pattern>*.JSPF</url-pattern>
+    <url-pattern>*.JSPX</url-pattern>
+    <url-pattern>*.XSP</url-pattern>
+  </servlet-mapping>
+
+  <!-- ==================================================================== -->
+  <!-- Dynamic Servlet Invoker.                                             -->
+  <!-- This servlet invokes anonymous servlets that have not been defined   -->
+  <!-- in the web.xml or by other means. The first element of the pathInfo  -->
+  <!-- of a request passed to the envoker is treated as a servlet name for  -->
+  <!-- an existing servlet, or as a class name of a new servlet.            -->
+  <!-- This servlet is normally mapped to /servlet/*                        -->
+  <!-- This servlet support the following initParams:                       -->
+  <!--                                                                      -->
+  <!--  nonContextServlets       If false, the invoker can only load        -->
+  <!--                           servlets from the contexts classloader.    -->
+  <!--                           This is false by default and setting this  -->
+  <!--                           to true may have security implications.    -->
+  <!--                                                                      -->
+  <!--  verbose                  If true, log dynamic loads                 -->
+  <!--                                                                      -->
+  <!--  *                        All other parameters are copied to the     -->
+  <!--                           each dynamic servlet as init parameters    -->
+  <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  -->
+  <!--
+    Uncomment for dynamic invocation <servlet> <servlet-name>invoker</servlet-name> <servlet-class>org.eclipse.jetty.servlet.Invoker</servlet-class> <init-param> <param-name>verbose</param-name>
+    <param-value>false</param-value> </init-param> <init-param> <param-name>nonContextServlets</param-name> <param-value>false</param-value> </init-param> <init-param>
+    <param-name>dynamicParam</param-name> <param-value>anyValue</param-value> </init-param> <load-on-startup>0</load-on-startup> </servlet> <servlet-mapping> <servlet-name>invoker</servlet-name>
+    <url-pattern>/servlet/*</url-pattern> </servlet-mapping>
+  -->
+
+
+
+  <!-- ==================================================================== -->
+  <session-config>
+    <session-timeout>30</session-timeout>
+  </session-config>
+
+  <!-- ==================================================================== -->
+  <!-- Default MIME mappings                                                -->
+  <!-- The default MIME mappings are provided by the mime.properties        -->
+  <!-- resource in the org.eclipse.jetty.server.jar file.  Additional or modified  -->
+  <!-- mappings may be specified here                                       -->
+  <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  -->
+  <!-- UNCOMMENT TO ACTIVATE
+  <mime-mapping>
+    <extension>mysuffix</extension>
+    <mime-type>mymime/type</mime-type>
+  </mime-mapping>
+  -->
+
+  <!-- ==================================================================== -->
+  <welcome-file-list>
+    <welcome-file>index.html</welcome-file>
+    <welcome-file>index.htm</welcome-file>
+    <welcome-file>index.jsp</welcome-file>
+  </welcome-file-list>
+
+  <!-- ==================================================================== -->
+  <locale-encoding-mapping-list>
+    <locale-encoding-mapping>
+      <locale>ar</locale>
+      <encoding>ISO-8859-6</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>be</locale>
+      <encoding>ISO-8859-5</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>bg</locale>
+      <encoding>ISO-8859-5</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>ca</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>cs</locale>
+      <encoding>ISO-8859-2</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>da</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>de</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>el</locale>
+      <encoding>ISO-8859-7</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>en</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>es</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>et</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>fi</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>fr</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>hr</locale>
+      <encoding>ISO-8859-2</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>hu</locale>
+      <encoding>ISO-8859-2</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>is</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>it</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>iw</locale>
+      <encoding>ISO-8859-8</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>ja</locale>
+      <encoding>Shift_JIS</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>ko</locale>
+      <encoding>EUC-KR</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>lt</locale>
+      <encoding>ISO-8859-2</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>lv</locale>
+      <encoding>ISO-8859-2</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>mk</locale>
+      <encoding>ISO-8859-5</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>nl</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>no</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>pl</locale>
+      <encoding>ISO-8859-2</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>pt</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>ro</locale>
+      <encoding>ISO-8859-2</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>ru</locale>
+      <encoding>ISO-8859-5</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>sh</locale>
+      <encoding>ISO-8859-5</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>sk</locale>
+      <encoding>ISO-8859-2</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>sl</locale>
+      <encoding>ISO-8859-2</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>sq</locale>
+      <encoding>ISO-8859-2</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>sr</locale>
+      <encoding>ISO-8859-5</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>sv</locale>
+      <encoding>ISO-8859-1</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>tr</locale>
+      <encoding>ISO-8859-9</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>uk</locale>
+      <encoding>ISO-8859-5</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>zh</locale>
+      <encoding>GB2312</encoding>
+    </locale-encoding-mapping>
+    <locale-encoding-mapping>
+      <locale>zh_TW</locale>
+      <encoding>Big5</encoding>
+    </locale-encoding-mapping>
+  </locale-encoding-mapping-list>
+
+  <security-constraint>
+    <web-resource-collection>
+      <web-resource-name>Disable TRACE</web-resource-name>
+      <url-pattern>/</url-pattern>
+      <http-method>TRACE</http-method>
+    </web-resource-collection>
+    <auth-constraint/>
+  </security-constraint>
+
+</web-app>
+
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/currency.xml b/zookeeper/example-schemaless/solr/collection1/conf/currency.xml

new file mode 100644 (file)

index 0000000..3a9c58a
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/currency.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
+
+<currencyConfig version="1.0">
+  <rates>
+    <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
+    <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
+    <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
+    <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
+    <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
+    <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
+    <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
+    <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
+    <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
+    <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
+    <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
+    <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
+    <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
+    <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
+    <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
+    <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
+    <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
+    <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
+    <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
+    <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
+    <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
+    <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
+    <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
+    <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
+    <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
+    <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
+    <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
+    <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
+    <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
+    <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
+    <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
+    <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
+    <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
+    <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
+    <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
+    <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
+    <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
+    <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
+    
+    <!-- Cross-rates for some common currencies -->
+    <rate from="EUR" to="GBP" rate="0.869914" />  
+    <rate from="EUR" to="NOK" rate="7.800095" />  
+    <rate from="GBP" to="NOK" rate="8.966508" />  
+  </rates>
+</currencyConfig>
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/elevate.xml b/zookeeper/example-schemaless/solr/collection1/conf/elevate.xml

new file mode 100644 (file)

index 0000000..25d5ceb
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/elevate.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- If this file is found in the config directory, it will only be
+     loaded once at startup.  If it is found in Solr's data
+     directory, it will be re-loaded every commit.
+
+   See http://wiki.apache.org/solr/QueryElevationComponent for more info
+
+-->
+<elevate>
+ <query text="foo bar">
+  <doc id="1" />
+  <doc id="2" />
+  <doc id="3" />
+ </query>
+ 
+ <query text="ipod">
+   <doc id="MA147LL/A" />  <!-- put the actual ipod at the top -->
+   <doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
+ </query>
+ 
+</elevate>
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ca.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ca.txt

new file mode 100644 (file)

index 0000000..307a85f
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ca.txt
@@ -0,0 +1,8 @@
+# Set of Catalan contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+l
+m
+n
+s
+t
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_fr.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_fr.txt

new file mode 100644 (file)

index 0000000..f1bba51
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_fr.txt
@@ -0,0 +1,15 @@
+# Set of French contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+l
+m
+t
+qu
+n
+s
+j
+d
+c
+jusqu
+quoiqu
+lorsqu
+puisqu
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ga.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ga.txt

new file mode 100644 (file)

index 0000000..9ebe7fa
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+m
+b
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_it.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_it.txt

new file mode 100644 (file)

index 0000000..cac0409
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_it.txt
@@ -0,0 +1,23 @@
+# Set of Italian contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+c
+l 
+all 
+dall 
+dell 
+nell 
+sull 
+coll 
+pell 
+gl 
+agl 
+dagl 
+degl 
+negl 
+sugl 
+un 
+m 
+t 
+s 
+v 
+d
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/hyphenations_ga.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/hyphenations_ga.txt

new file mode 100644 (file)

index 0000000..4d2642c
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/hyphenations_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish hyphenations for StopFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+h
+n
+t
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stemdict_nl.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stemdict_nl.txt

new file mode 100644 (file)

index 0000000..4410729
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stemdict_nl.txt
@@ -0,0 +1,6 @@
+# Set of overrides for the dutch stemmer
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+fiets  fiets
+bromfiets      bromfiets
+ei     eier
+kind   kinder
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stoptags_ja.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stoptags_ja.txt

new file mode 100644 (file)

index 0000000..71b7508
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stoptags_ja.txt
@@ -0,0 +1,420 @@
+#
+# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
+#
+# Any token with a part-of-speech tag that exactly matches those defined in this
+# file are removed from the token stream.
+#
+# Set your own stoptags by uncommenting the lines below.  Note that comments are
+# not allowed on the same line as a stoptag.  See LUCENE-3745 for frequency lists,
+# etc. that can be useful for building you own stoptag set.
+#
+# The entire possible tagset is provided below for convenience.
+#
+#####
+#  noun: unclassified nouns
+#名詞
+#
+#  noun-common: Common nouns or nouns where the sub-classification is undefined
+#名詞-一般
+#
+#  noun-proper: Proper nouns where the sub-classification is undefined 
+#名詞-固有名詞
+#
+#  noun-proper-misc: miscellaneous proper nouns
+#名詞-固有名詞-一般
+#
+#  noun-proper-person: Personal names where the sub-classification is undefined
+#名詞-固有名詞-人名
+#
+#  noun-proper-person-misc: names that cannot be divided into surname and 
+#  given name; foreign names; names where the surname or given name is unknown.
+#  e.g. お市の方
+#名詞-固有名詞-人名-一般
+#
+#  noun-proper-person-surname: Mainly Japanese surnames.
+#  e.g. 山田
+#名詞-固有名詞-人名-姓
+#
+#  noun-proper-person-given_name: Mainly Japanese given names.
+#  e.g. 太郎
+#名詞-固有名詞-人名-名
+#
+#  noun-proper-organization: Names representing organizations.
+#  e.g. 通産省, NHK
+#名詞-固有名詞-組織
+#
+#  noun-proper-place: Place names where the sub-classification is undefined
+#名詞-固有名詞-地域
+#
+#  noun-proper-place-misc: Place names excluding countries.
+#  e.g. アジア, バルセロナ, 京都
+#名詞-固有名詞-地域-一般
+#
+#  noun-proper-place-country: Country names. 
+#  e.g. 日本, オーストラリア
+#名詞-固有名詞-地域-国
+#
+#  noun-pronoun: Pronouns where the sub-classification is undefined
+#名詞-代名詞
+#
+#  noun-pronoun-misc: miscellaneous pronouns: 
+#  e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
+#名詞-代名詞-一般
+#
+#  noun-pronoun-contraction: Spoken language contraction made by combining a 
+#  pronoun and the particle 'wa'.
+#  e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ 
+#名詞-代名詞-縮約
+#
+#  noun-adverbial: Temporal nouns such as names of days or months that behave 
+#  like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+#  e.g. 金曜, 一月, 午後, 少量
+#名詞-副詞可能
+#
+#  noun-verbal: Nouns that take arguments with case and can appear followed by 
+#  'suru' and related verbs (する, できる, なさる, くださる)
+#  e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
+#名詞-サ変接続
+#
+#  noun-adjective-base: The base form of adjectives, words that appear before な ("na")
+#  e.g. 健康, 安易, 駄目, だめ
+#名詞-形容動詞語幹
+#
+#  noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
+#  e.g. 0, 1, 2, 何, 数, 幾
+#名詞-数
+#
+#  noun-affix: noun affixes where the sub-classification is undefined
+#名詞-非自立
+#
+#  noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that 
+#  attach to the base form of inflectional words, words that cannot be classified 
+#  into any of the other categories below. This category includes indefinite nouns.
+#  e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, 
+#       順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, 
+#       拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
+#       わり, 割り, 割, ん-口語/, もん-口語/
+#名詞-非自立-一般
+#
+#  noun-affix-adverbial: noun affixes that that can behave as adverbs.
+#  e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, 
+#       上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, 
+#       最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, 
+#       とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, 
+#       儘, 侭, みぎり, 矢先
+#名詞-非自立-副詞可能
+#
+#  noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars 
+#  with the stem よう(だ) ("you(da)").
+#  e.g.  よう, やう, 様 (よう)
+#名詞-非自立-助動詞語幹
+#  
+#  noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+#  connection form な (aux "da").
+#  e.g. みたい, ふう
+#名詞-非自立-形容動詞語幹
+#
+#  noun-special: special nouns where the sub-classification is undefined.
+#名詞-特殊
+#
+#  noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is 
+#  treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base 
+#  form of inflectional words.
+#  e.g. そう
+#名詞-特殊-助動詞語幹
+#
+#  noun-suffix: noun suffixes where the sub-classification is undefined.
+#名詞-接尾
+#
+#  noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect 
+#  to ガル or タイ and can combine into compound nouns, words that cannot be classified into
+#  any of the other categories below. In general, this category is more inclusive than 
+#  接尾語 ("suffix") and is usually the last element in a compound noun.
+#  e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (～した) さ, 次第, 済 (ず) み,
+#       よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
+#名詞-接尾-一般
+#
+#  noun-suffix-person: Suffixes that form nouns and attach to person names more often
+#  than other nouns.
+#  e.g. 君, 様, 著
+#名詞-接尾-人名
+#
+#  noun-suffix-place: Suffixes that form nouns and attach to place names more often 
+#  than other nouns.
+#  e.g. 町, 市, 県
+#名詞-接尾-地域
+#
+#  noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that 
+#  can appear before スル ("suru").
+#  e.g. 化, 視, 分け, 入り, 落ち, 買い
+#名詞-接尾-サ変接続
+#
+#  noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, 
+#  is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the 
+#  conjunctive form of inflectional words.
+#  e.g. そう
+#名詞-接尾-助動詞語幹
+#
+#  noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive 
+#  form of inflectional words and appear before the copula だ ("da").
+#  e.g. 的, げ, がち
+#名詞-接尾-形容動詞語幹
+#
+#  noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+#  e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
+#名詞-接尾-副詞可能
+#
+#  noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category 
+#  is more inclusive than 助数詞 ("classifier") and includes common nouns that attach 
+#  to numbers.
+#  e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
+#名詞-接尾-助数詞
+#
+#  noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+#  e.g. (楽し) さ, (考え) 方
+#名詞-接尾-特殊
+#
+#  noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words 
+#  together.
+#  e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
+#名詞-接続詞的
+#
+#  noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are 
+#  semantically verb-like.
+#  e.g. ごらん, ご覧, 御覧, 頂戴
+#名詞-動詞非自立的
+#
+#  noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, 
+#  dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") 
+#  is いわく ("iwaku").
+#名詞-引用文字列
+#
+#  noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
+#  behave like an adjective.
+#  e.g. 申し訳, 仕方, とんでも, 違い
+#名詞-ナイ形容詞語幹
+#
+#####
+#  prefix: unclassified prefixes
+#接頭詞
+#
+#  prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) 
+#  excluding numerical expressions.
+#  e.g. お (水), 某 (氏), 同 (社), 故 (～氏), 高 (品質), お (見事), ご (立派)
+#接頭詞-名詞接続
+#
+#  prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+#  in conjunctive form followed by なる/なさる/くださる.
+#  e.g. お (読みなさい), お (座り)
+#接頭詞-動詞接続
+#
+#  prefix-adjectival: Prefixes that attach to adjectives.
+#  e.g. お (寒いですねえ), バカ (でかい)
+#接頭詞-形容詞接続
+#
+#  prefix-numerical: Prefixes that attach to numerical expressions.
+#  e.g. 約, およそ, 毎時
+#接頭詞-数接続
+#
+#####
+#  verb: unclassified verbs
+#動詞
+#
+#  verb-main:
+#動詞-自立
+#
+#  verb-auxiliary:
+#動詞-非自立
+#
+#  verb-suffix:
+#動詞-接尾
+#
+#####
+#  adjective: unclassified adjectives
+#形容詞
+#
+#  adjective-main:
+#形容詞-自立
+#
+#  adjective-auxiliary:
+#形容詞-非自立
+#
+#  adjective-suffix:
+#形容詞-接尾
+#
+#####
+#  adverb: unclassified adverbs
+#副詞
+#
+#  adverb-misc: Words that can be segmented into one unit and where adnominal 
+#  modification is not possible.
+#  e.g. あいかわらず, 多分
+#副詞-一般
+#
+#  adverb-particle_conjunction: Adverbs that can be followed by の, は, に, 
+#  な, する, だ, etc.
+#  e.g. こんなに, そんなに, あんなに, なにか, なんでも
+#副詞-助詞類接続
+#
+#####
+#  adnominal: Words that only have noun-modifying forms.
+#  e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, 
+#       どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, 
+#       「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
+#連体詞
+#
+#####
+#  conjunction: Conjunctions that can occur independently.
+#  e.g. が, けれども, そして, じゃあ, それどころか
+接続詞
+#
+#####
+#  particle: unclassified particles.
+助詞
+#
+#  particle-case: case particles where the subclassification is undefined.
+助詞-格助詞
+#
+#  particle-case-misc: Case particles.
+#  e.g. から, が, で, と, に, へ, より, を, の, にて
+助詞-格助詞-一般
+#
+#  particle-case-quote: the "to" that appears after nouns, a person’s speech, 
+#  quotation marks, expressions of decisions from a meeting, reasons, judgements,
+#  conjectures, etc.
+#  e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
+助詞-格助詞-引用
+#
+#  particle-case-compound: Compounds of particles and verbs that mainly behave 
+#  like case particles.
+#  e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
+#       にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, 
+#       にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, 
+#       に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, 
+#       に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
+#       にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, 
+#       にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
+#       って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
+助詞-格助詞-連語
+#
+#  particle-conjunctive:
+#  e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, 
+#       ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, 
+#       (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
+助詞-接続助詞
+#
+#  particle-dependency:
+#  e.g. こそ, さえ, しか, すら, は, も, ぞ
+助詞-係助詞
+#
+#  particle-adverbial:
+#  e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, 
+#       (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
+#       (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, 
+#       (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
+#       ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
+助詞-副助詞
+#
+#  particle-interjective: particles with interjective grammatical roles.
+#  e.g. (松島) や
+助詞-間投助詞
+#
+#  particle-coordinate:
+#  e.g. と, たり, だの, だり, とか, なり, や, やら
+助詞-並立助詞
+#
+#  particle-final:
+#  e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, 
+#       ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
+助詞-終助詞
+#
+#  particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is 
+#  adverbial, conjunctive, or sentence final. For example:
+#       (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
+#       (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
+#           「(祈りが届いたせい) か (, 試験に合格した.)」
+#       (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
+#  e.g. か
+助詞-副助詞／並立助詞／終助詞
+#
+#  particle-adnominalizer: The "no" that attaches to nouns and modifies 
+#  non-inflectional words.
+助詞-連体化
+#
+#  particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs 
+#  that are giongo, giseigo, or gitaigo.
+#  e.g. に, と
+助詞-副詞化
+#
+#  particle-special: A particle that does not fit into one of the above classifications. 
+#  This includes particles that are used in Tanka, Haiku, and other poetry.
+#  e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
+助詞-特殊
+#
+#####
+#  auxiliary-verb:
+助動詞
+#
+#####
+#  interjection: Greetings and other exclamations.
+#  e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, 
+#       いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
+#感動詞
+#
+#####
+#  symbol: unclassified Symbols.
+記号
+#
+#  symbol-misc: A general symbol not in one of the categories below.
+#  e.g. [○◎@$〒→+]
+記号-一般
+#
+#  symbol-comma: Commas
+#  e.g. [,、]
+記号-読点
+#
+#  symbol-period: Periods and full stops.
+#  e.g. [.．。]
+記号-句点
+#
+#  symbol-space: Full-width whitespace.
+記号-空白
+#
+#  symbol-open_bracket:
+#  e.g. [({‘“『【]
+記号-括弧開
+#
+#  symbol-close_bracket:
+#  e.g. [)}’”』」】]
+記号-括弧閉
+#
+#  symbol-alphabetic:
+#記号-アルファベット
+#
+#####
+#  other: unclassified other
+#その他
+#
+#  other-interjection: Words that are hard to classify as noun-suffixes or 
+#  sentence-final particles.
+#  e.g. (だ)ァ
+その他-間投
+#
+#####
+#  filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+#  e.g. あの, うんと, えと
+フィラー
+#
+#####
+#  non-verbal: non-verbal sound.
+非言語音
+#
+#####
+#  fragment:
+#語断片
+#
+#####
+#  unknown: unknown part of speech.
+#未知語
+#
+##### End of file
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ar.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ar.txt

new file mode 100644 (file)

index 0000000..046829d
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ar.txt
@@ -0,0 +1,125 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some 
+# redundant entries, for example containing forms with both أ and ا
+من
+ومن
+منها
+منه
+في
+وفي
+فيها
+فيه
+و
+ف
+ثم
+او
+أو
+ب
+بها
+به
+ا
+أ
+اى
+اي
+أي
+أى
+لا
+ولا
+الا
+ألا
+إلا
+لكن
+ما
+وما
+كما
+فما
+عن
+مع
+اذا
+إذا
+ان
+أن
+إن
+انها
+أنها
+إنها
+انه
+أنه
+إنه
+بان
+بأن
+فان
+فأن
+وان
+وأن
+وإن
+التى
+التي
+الذى
+الذي
+الذين
+الى
+الي
+إلى
+إلي
+على
+عليها
+عليه
+اما
+أما
+إما
+ايضا
+أيضا
+كل
+وكل
+لم
+ولم
+لن
+ولن
+هى
+هي
+هو
+وهى
+وهي
+وهو
+فهى
+فهي
+فهو
+انت
+أنت
+لك
+لها
+له
+هذه
+هذا
+تلك
+ذلك
+هناك
+كانت
+كان
+يكون
+تكون
+وكانت
+وكان
+غير
+بعض
+قد
+نحو
+بين
+بينما
+منذ
+ضمن
+حيث
+الان
+الآن
+خلال
+بعد
+قبل
+حتى
+عند
+عندما
+لدى
+جميع
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_bg.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_bg.txt

new file mode 100644 (file)

index 0000000..1ae4ba2
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_bg.txt
@@ -0,0 +1,193 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+а
+аз
+ако
+ала
+бе
+без
+беше
+би
+бил
+била
+били
+било
+близо
+бъдат
+бъде
+бяха
+в
+вас
+ваш
+ваша
+вероятно
+вече
+взема
+ви
+вие
+винаги
+все
+всеки
+всички
+всичко
+всяка
+във
+въпреки
+върху
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докато
+докога
+дори
+досега
+доста
+е
+едва
+един
+ето
+за
+зад
+заедно
+заради
+засега
+затова
+защо
+защото
+и
+из
+или
+им
+има
+имат
+иска
+й
+каза
+как
+каква
+какво
+както
+какъв
+като
+кога
+когато
+което
+които
+кой
+който
+колко
+която
+къде
+където
+към
+ли
+м
+ме
+между
+мен
+ми
+мнозина
+мога
+могат
+може
+моля
+момента
+му
+н
+на
+над
+назад
+най
+направи
+напред
+например
+нас
+не
+него
+нея
+ни
+ние
+никой
+нито
+но
+някои
+някой
+няма
+обаче
+около
+освен
+особено
+от
+отгоре
+отново
+още
+пак
+по
+повече
+повечето
+под
+поне
+поради
+после
+почти
+прави
+пред
+преди
+през
+при
+пък
+първо
+с
+са
+само
+се
+сега
+си
+скоро
+след
+сме
+според
+сред
+срещу
+сте
+съм
+със
+също
+т
+тази
+така
+такива
+такъв
+там
+твой
+те
+тези
+ти
+тн
+то
+това
+тогава
+този
+той
+толкова
+точно
+трябва
+тук
+тъй
+тя
+тях
+у
+харесва
+ч
+че
+често
+чрез
+ще
+щом
+я
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ca.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ca.txt

new file mode 100644 (file)

index 0000000..3da65de
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ca.txt
@@ -0,0 +1,220 @@
+# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
+a
+abans
+ací
+ah
+així
+això
+al
+als
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allà
+allí
+allò
+altra
+altre
+altres
+amb
+ambdós
+ambdues
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquí
+baix
+cada
+cadascú
+cadascuna
+cadascunes
+cadascuns
+com
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+després
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+els
+em
+en
+encara
+ens
+entre
+érem
+eren
+éreu
+es
+és
+esta
+està
+estàvem
+estaven
+estàveu
+esteu
+et
+etc
+ets
+fins
+fora
+gairebé
+ha
+han
+has
+havia
+he
+hem
+heu
+hi 
+ho
+i
+igual
+iguals
+ja
+l'hi
+la
+les
+li
+li'n
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+més
+meu
+meus
+meva
+meves
+molt
+molta
+moltes
+molts
+mon
+mons
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+només
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+però
+perquè
+poc 
+poca
+pocs
+poques
+potser
+propi
+qual
+quals
+quan
+quant 
+que
+què
+quelcom
+qui
+quin
+quina
+quines
+quins
+s'ha
+s'han
+sa
+semblant
+semblants
+ses
+seu 
+seus
+seva
+seva
+seves
+si
+sobre
+sobretot
+sóc
+solament
+sols
+son 
+són
+sons 
+sota
+sou
+t'ha
+t'han
+t'he
+ta
+tal
+també
+tampoc
+tan
+tant
+tanta
+tantes
+teu
+teus
+teva
+teves
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_cz.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_cz.txt

new file mode 100644 (file)

index 0000000..53c6097
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_cz.txt
@@ -0,0 +1,172 @@
+a
+s
+k
+o
+i
+u
+v
+z
+dnes
+cz
+tímto
+budeš
+budem
+byli
+jseš
+můj
+svým
+ta
+tomto
+tohle
+tuto
+tyto
+jej
+zda
+proč
+máte
+tato
+kam
+tohoto
+kdo
+kteří
+mi
+nám
+tom
+tomuto
+mít
+nic
+proto
+kterou
+byla
+toho
+protože
+asi
+ho
+naši
+napište
+re
+což
+tím
+takže
+svých
+její
+svými
+jste
+aj
+tu
+tedy
+teto
+bylo
+kde
+ke
+pravé
+ji
+nad
+nejsou
+či
+pod
+téma
+mezi
+přes
+ty
+pak
+vám
+ani
+když
+však
+neg
+jsem
+tento
+článku
+články
+aby
+jsme
+před
+pta
+jejich
+byl
+ještě
+až
+bez
+také
+pouze
+první
+vaše
+která
+nás
+nový
+tipy
+pokud
+může
+strana
+jeho
+své
+jiné
+zprávy
+nové
+není
+vás
+jen
+podle
+zde
+už
+být
+více
+bude
+již
+než
+který
+by
+které
+co
+nebo
+ten
+tak
+má
+při
+od
+po
+jsou
+jak
+další
+ale
+si
+se
+ve
+to
+jako
+za
+zpět
+ze
+do
+pro
+je
+na
+atd
+atp
+jakmile
+přičemž
+já
+on
+ona
+ono
+oni
+ony
+my
+vy
+jí
+ji
+mě
+mne
+jemu
+tomu
+těm
+těmu
+němu
+němuž
+jehož
+jíž
+jelikož
+jež
+jakož
+načež
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_da.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_da.txt

new file mode 100644 (file)

index 0000000..a3ff5fe
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_da.txt
@@ -0,0 +1,108 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og           | and
+i            | in
+jeg          | I
+det          | that (dem. pronoun)/it (pers. pronoun)
+at           | that (in front of a sentence)/to (with infinitive)
+en           | a/an
+den          | it (pers. pronoun)/that (dem. pronoun)
+til          | to/at/for/until/against/by/of/into, more
+er           | present tense of "to be"
+som          | who, as
+på           | on/upon/in/on/at/to/after/of/with/for, on
+de           | they
+med          | with/by/in, along
+han          | he
+af           | of/by/from/off/for/in/with/on, off
+for          | at/for/to/from/by/of/ago, in front/before, because
+ikke         | not
+der          | who/which, there/those
+var          | past tense of "to be"
+mig          | me/myself
+sig          | oneself/himself/herself/itself/themselves
+men          | but
+et           | a/an/one, one (number), someone/somebody/one
+har          | present tense of "to have"
+om           | round/about/for/in/a, about/around/down, if
+vi           | we
+min          | my
+havde        | past tense of "to have"
+ham          | him
+hun          | she
+nu           | now
+over         | over/above/across/by/beyond/past/on/about, over/past
+da           | then, when/as/since
+fra          | from/off/since, off, since
+du           | you
+ud           | out
+sin          | his/her/its/one's
+dem          | them
+os           | us/ourselves
+op           | up
+man          | you/one
+hans         | his
+hvor         | where
+eller        | or
+hvad         | what
+skal         | must/shall etc.
+selv         | myself/youself/herself/ourselves etc., even
+her          | here
+alle         | all/everyone/everybody etc.
+vil          | will (verb)
+blev         | past tense of "to stay/to remain/to get/to become"
+kunne        | could
+ind          | in
+når          | when
+være         | present tense of "to be"
+dog          | however/yet/after all
+noget        | something
+ville        | would
+jo           | you know/you see (adv), yes
+deres        | their/theirs
+efter        | after/behind/according to/for/by/from, later/afterwards
+ned          | down
+skulle       | should
+denne        | this
+end          | than
+dette        | this
+mit          | my/mine
+også         | also
+under        | under/beneath/below/during, below/underneath
+have         | have
+dig          | you
+anden        | other
+hende        | her
+mine         | my
+alt          | everything
+meget        | much/very, plenty of
+sit          | his, her, its, one's
+sine         | his, her, its, one's
+vor          | our
+mod          | against
+disse        | these
+hvis         | if
+din          | your/yours
+nogle        | some
+hos          | by/at
+blive        | be/become
+mange        | many
+ad           | by/through
+bliver       | present tense of "to be/to become"
+hendes       | her/hers
+været        | be
+thi          | for (conj)
+jer          | you
+sådan        | such, like this/like that
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_de.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_de.txt

new file mode 100644 (file)

index 0000000..f770384
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_de.txt
@@ -0,0 +1,292 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber           |  but
+
+alle           |  all
+allem
+allen
+aller
+alles
+
+als            |  than, as
+also           |  so
+am             |  an + dem
+an             |  at
+
+ander          |  other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch           |  also
+auf            |  on
+aus            |  out of
+bei            |  by
+bin            |  am
+bis            |  until
+bist           |  art
+da             |  there
+damit          |  with it
+dann           |  then
+
+der            |  the
+den
+des
+dem
+die
+das
+
+daß            |  that
+
+derselbe       |  the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu           |  to that
+
+dein           |  thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn           |  because
+
+derer          |  of those
+dessen         |  of him
+
+dich           |  thee
+dir            |  to thee
+du             |  thou
+
+dies           |  this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch           |  (several meanings)
+dort           |  (over) there
+
+
+durch          |  through
+
+ein            |  a
+eine
+einem
+einen
+einer
+eines
+
+einig          |  some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal         |  once
+
+er             |  he
+ihn            |  him
+ihm            |  to him
+
+es             |  it
+etwas          |  something
+
+euer           |  your
+eure
+eurem
+euren
+eurer
+eures
+
+für            |  for
+gegen          |  towards
+gewesen        |  p.p. of sein
+hab            |  have
+habe           |  have
+haben          |  have
+hat            |  has
+hatte          |  had
+hatten         |  had
+hier           |  here
+hin            |  there
+hinter         |  behind
+
+ich            |  I
+mich           |  me
+mir            |  to me
+
+
+ihr            |  you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch           |  to you
+
+im             |  in + dem
+in             |  in
+indem          |  while
+ins            |  in + das
+ist            |  is
+
+jede           |  each, every
+jedem
+jeden
+jeder
+jedes
+
+jene           |  that
+jenem
+jenen
+jener
+jenes
+
+jetzt          |  now
+kann           |  can
+
+kein           |  no
+keine
+keinem
+keinen
+keiner
+keines
+
+können         |  can
+könnte         |  could
+machen         |  do
+man            |  one
+
+manche         |  some, many a
+manchem
+manchen
+mancher
+manches
+
+mein           |  my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit            |  with
+muss           |  must
+musste         |  had to
+nach           |  to(wards)
+nicht          |  not
+nichts         |  nothing
+noch           |  still, yet
+nun            |  now
+nur            |  only
+ob             |  whether
+oder           |  or
+ohne           |  without
+sehr           |  very
+
+sein           |  his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst         |  self
+sich           |  herself
+
+sie            |  they, she
+ihnen          |  to them
+
+sind           |  are
+so             |  so
+
+solche         |  such
+solchem
+solchen
+solcher
+solches
+
+soll           |  shall
+sollte         |  should
+sondern        |  but
+sonst          |  else
+über           |  over
+um             |  about, around
+und            |  and
+
+uns            |  us
+unse
+unsem
+unsen
+unser
+unses
+
+unter          |  under
+viel           |  much
+vom            |  von + dem
+von            |  from
+vor            |  before
+während        |  while
+war            |  was
+waren          |  were
+warst          |  wast
+was            |  what
+weg            |  away, off
+weil           |  because
+weiter         |  further
+
+welche         |  which
+welchem
+welchen
+welcher
+welches
+
+wenn           |  when
+werde          |  will
+werden         |  will
+wie            |  how
+wieder         |  again
+will           |  want
+wir            |  we
+wird           |  will
+wirst          |  willst
+wo             |  where
+wollen         |  want
+wollte         |  wanted
+würde          |  would
+würden         |  would
+zu             |  to
+zum            |  zu + dem
+zur            |  zu + der
+zwar           |  indeed
+zwischen       |  between
+
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_el.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_el.txt

new file mode 100644 (file)

index 0000000..232681f
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_el.txt
@@ -0,0 +1,78 @@
+# Lucene Greek Stopwords list
+# Note: by default this file is used after GreekLowerCaseFilter,
+# so when modifying this file use 'σ' instead of 'ς' 
+ο
+η
+το
+οι
+τα
+του
+τησ
+των
+τον
+την
+και 
+κι
+κ
+ειμαι
+εισαι
+ειναι
+ειμαστε
+ειστε
+στο
+στον
+στη
+στην
+μα
+αλλα
+απο
+για
+προσ
+με
+σε
+ωσ
+παρα
+αντι
+κατα
+μετα
+θα
+να
+δε
+δεν
+μη
+μην
+επι
+ενω
+εαν
+αν
+τοτε
+που
+πωσ
+ποιοσ
+ποια
+ποιο
+ποιοι
+ποιεσ
+ποιων
+ποιουσ
+αυτοσ
+αυτη
+αυτο
+αυτοι
+αυτων
+αυτουσ
+αυτεσ
+αυτα
+εκεινοσ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεσ
+εκεινα
+εκεινων
+εκεινουσ
+οπωσ
+ομωσ
+ισωσ
+οσο
+οτι
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_en.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_en.txt

new file mode 100644 (file)

index 0000000..2c164c0
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_es.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_es.txt

new file mode 100644 (file)

index 0000000..2db1476
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_es.txt
@@ -0,0 +1,354 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de             |  from, of
+la             |  the, her
+que            |  who, that
+el             |  the
+en             |  in
+y              |  and
+a              |  to
+los            |  the, them
+del            |  de + el
+se             |  himself, from him etc
+las            |  the, them
+por            |  for, by, etc
+un             |  a
+para           |  for
+con            |  with
+no             |  no
+una            |  a
+su             |  his, her
+al             |  a + el
+  | es         from SER
+lo             |  him
+como           |  how
+más            |  more
+pero           |  pero
+sus            |  su plural
+le             |  to him, her
+ya             |  already
+o              |  or
+  | fue        from SER
+este           |  this
+  | ha         from HABER
+sí             |  himself etc
+porque         |  because
+esta           |  this
+  | son        from SER
+entre          |  between
+  | está     from ESTAR
+cuando         |  when
+muy            |  very
+sin            |  without
+sobre          |  on
+  | ser        from SER
+  | tiene      from TENER
+también        |  also
+me             |  me
+hasta          |  until
+hay            |  there is/are
+donde          |  where
+  | han        from HABER
+quien          |  whom, that
+  | están      from ESTAR
+  | estado     from ESTAR
+desde          |  from
+todo           |  all
+nos            |  us
+durante        |  during
+  | estados    from ESTAR
+todos          |  all
+uno            |  a
+les            |  to them
+ni             |  nor
+contra         |  against
+otros          |  other
+  | fueron     from SER
+ese            |  that
+eso            |  that
+  | había      from HABER
+ante           |  before
+ellos          |  they
+e              |  and (variant of y)
+esto           |  this
+mí             |  me
+antes          |  before
+algunos        |  some
+qué            |  what?
+unos           |  a
+yo             |  I
+otro           |  other
+otras          |  other
+otra           |  other
+él             |  he
+tanto          |  so much, many
+esa            |  that
+estos          |  these
+mucho          |  much, many
+quienes        |  who
+nada           |  nothing
+muchos         |  many
+cual           |  who
+  | sea        from SER
+poco           |  few
+ella           |  she
+estar          |  to be
+  | haber      from HABER
+estas          |  these
+  | estaba     from ESTAR
+  | estamos    from ESTAR
+algunas        |  some
+algo           |  something
+nosotros       |  we
+
+      | other forms
+
+mi             |  me
+mis            |  mi plural
+tú             |  thou
+te             |  thee
+ti             |  thee
+tu             |  thy
+tus            |  tu plural
+ellas          |  they
+nosotras       |  we
+vosotros       |  you
+vosotras       |  you
+os             |  you
+mío            |  mine
+mía            |
+míos           |
+mías           |
+tuyo           |  thine
+tuya           |
+tuyos          |
+tuyas          |
+suyo           |  his, hers, theirs
+suya           |
+suyos          |
+suyas          |
+nuestro        |  ours
+nuestra        |
+nuestros       |
+nuestras       |
+vuestro        |  yours
+vuestra        |
+vuestros       |
+vuestras       |
+esos           |  those
+esas           |  those
+
+               | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estaría
+estarías
+estaríamos
+estaríais
+estarían
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+               | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habría
+habrías
+habríamos
+habríais
+habrían
+había
+habías
+habíamos
+habíais
+habían
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+               | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+sería
+serías
+seríamos
+seríais
+serían
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+  |  sed also means 'thirst'
+
+               | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendría
+tendrías
+tendríamos
+tendríais
+tendrían
+tenía
+tenías
+teníamos
+teníais
+tenían
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_eu.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_eu.txt

new file mode 100644 (file)

index 0000000..25f1db9
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_eu.txt
@@ -0,0 +1,99 @@
+# example set of basque stopwords
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fa.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fa.txt

new file mode 100644 (file)

index 0000000..723641c
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fa.txt
@@ -0,0 +1,313 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Note: by default this file is used after normalization, so when adding entries
+# to this file, use the arabic 'ي' instead of 'ی'
+انان
+نداشته
+سراسر
+خياه
+ايشان
+وي
+تاكنون
+بيشتري
+دوم
+پس
+ناشي
+وگو
+يا
+داشتند
+سپس
+هنگام
+هرگز
+پنج
+نشان
+امسال
+ديگر
+گروهي
+شدند
+چطور
+ده
+و
+دو
+نخستين
+ولي
+چرا
+چه
+وسط
+ه
+كدام
+قابل
+يك
+رفت
+هفت
+همچنين
+در
+هزار
+بله
+بلي
+شايد
+اما
+شناسي
+گرفته
+دهد
+داشته
+دانست
+داشتن
+خواهيم
+ميليارد
+وقتيكه
+امد
+خواهد
+جز
+اورده
+شده
+بلكه
+خدمات
+شدن
+برخي
+نبود
+بسياري
+جلوگيري
+حق
+كردند
+نوعي
+بعري
+نكرده
+نظير
+نبايد
+بوده
+بودن
+داد
+اورد
+هست
+جايي
+شود
+دنبال
+داده
+بايد
+سابق
+هيچ
+همان
+انجا
+كمتر
+كجاست
+گردد
+كسي
+تر
+مردم
+تان
+دادن
+بودند
+سري
+جدا
+ندارند
+مگر
+يكديگر
+دارد
+دهند
+بنابراين
+هنگامي
+سمت
+جا
+انچه
+خود
+دادند
+زياد
+دارند
+اثر
+بدون
+بهترين
+بيشتر
+البته
+به
+براساس
+بيرون
+كرد
+بعضي
+گرفت
+توي
+اي
+ميليون
+او
+جريان
+تول
+بر
+مانند
+برابر
+باشيم
+مدتي
+گويند
+اكنون
+تا
+تنها
+جديد
+چند
+بي
+نشده
+كردن
+كردم
+گويد
+كرده
+كنيم
+نمي
+نزد
+روي
+قصد
+فقط
+بالاي
+ديگران
+اين
+ديروز
+توسط
+سوم
+ايم
+دانند
+سوي
+استفاده
+شما
+كنار
+داريم
+ساخته
+طور
+امده
+رفته
+نخست
+بيست
+نزديك
+طي
+كنيد
+از
+انها
+تمامي
+داشت
+يكي
+طريق
+اش
+چيست
+روب
+نمايد
+گفت
+چندين
+چيزي
+تواند
+ام
+ايا
+با
+ان
+ايد
+ترين
+اينكه
+ديگري
+راه
+هايي
+بروز
+همچنان
+پاعين
+كس
+حدود
+مختلف
+مقابل
+چيز
+گيرد
+ندارد
+ضد
+همچون
+سازي
+شان
+مورد
+باره
+مرسي
+خويش
+برخوردار
+چون
+خارج
+شش
+هنوز
+تحت
+ضمن
+هستيم
+گفته
+فكر
+بسيار
+پيش
+براي
+روزهاي
+انكه
+نخواهد
+بالا
+كل
+وقتي
+كي
+چنين
+كه
+گيري
+نيست
+است
+كجا
+كند
+نيز
+يابد
+بندي
+حتي
+توانند
+عقب
+خواست
+كنند
+بين
+تمام
+همه
+ما
+باشند
+مثل
+شد
+اري
+باشد
+اره
+طبق
+بعد
+اگر
+صورت
+غير
+جاي
+بيش
+ريزي
+اند
+زيرا
+چگونه
+بار
+لطفا
+مي
+درباره
+من
+ديده
+همين
+گذاري
+برداري
+علت
+گذاشته
+هم
+فوق
+نه
+ها
+شوند
+اباد
+همواره
+هر
+اول
+خواهند
+چهار
+نام
+امروز
+مان
+هاي
+قبل
+كنم
+سعي
+تازه
+را
+هستند
+زير
+جلوي
+عنوان
+بود
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fi.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fi.txt

new file mode 100644 (file)

index 0000000..addad79
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fi.txt
@@ -0,0 +1,95 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ 
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole        | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en         | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans
+minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I
+sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you
+hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she
+me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we
+te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you
+he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they
+
+tämä   tämän         tätä   tässä   tästä   tähän  tallä   tältä   tälle   tänä   täksi  | this
+tuo    tuon          tuotä  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that
+se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it
+nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these
+nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those
+ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they
+
+kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl)
+mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what
+mitkä                                                                                    | (pl)
+
+joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which
+jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl)
+
+| conjunctions
+
+että   | that
+ja     | and
+jos    | if
+koska  | because
+kuin   | than
+mutta  | but
+niin   | so
+sekä   | and
+sillä  | for
+tai    | or
+vaan   | but
+vai    | or
+vaikka | although
+
+
+| prepositions
+
+kanssa  | with
+mukaan  | according to
+noin    | about
+poikki  | across
+yli     | over, across
+
+| other
+
+kun    | when
+niin   | so
+nyt    | now
+itse   | self
+
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fr.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fr.txt

new file mode 100644 (file)

index 0000000..20d12cb
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fr.txt
@@ -0,0 +1,184 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au             |  a + le
+aux            |  a + les
+avec           |  with
+ce             |  this
+ces            |  these
+dans           |  with
+de             |  of
+des            |  de + les
+du             |  de + le
+elle           |  she
+en             |  `of them' etc
+et             |  and
+eux            |  them
+il             |  he
+je             |  I
+la             |  the
+le             |  the
+leur           |  their
+lui            |  him
+ma             |  my (fem)
+mais           |  but
+me             |  me
+même           |  same; as in moi-même (myself) etc
+mes            |  me (pl)
+moi            |  me
+mon            |  my (masc)
+ne             |  not
+nos            |  our (pl)
+notre          |  our
+nous           |  we
+on             |  one
+ou             |  where
+par            |  by
+pas            |  not
+pour           |  for
+qu             |  que before vowel
+que            |  that
+qui            |  who
+sa             |  his, her (fem)
+se             |  oneself
+ses            |  his (pl)
+son            |  his, her (masc)
+sur            |  on
+ta             |  thy (fem)
+te             |  thee
+tes            |  thy (pl)
+toi            |  thee
+ton            |  thy (masc)
+tu             |  thou
+un             |  a
+une            |  a
+vos            |  your (pl)
+votre          |  your
+vous           |  you
+
+               |  single letter forms
+
+c              |  c'
+d              |  d'
+j              |  j'
+l              |  l'
+à              |  to, at
+m              |  m'
+n              |  n'
+s              |  s'
+t              |  t'
+y              |  there
+
+               | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+               | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+               | Later additions (from Jean-Christophe Deschamps)
+ceci           |  this
+cela           |  that
+celà           |  that
+cet            |  this
+cette          |  this
+ici            |  here
+ils            |  they
+les            |  the (pl)
+leurs          |  their (pl)
+quel           |  which
+quels          |  which
+quelle         |  which
+quelles        |  which
+sans           |  without
+soi            |  oneself
+
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ga.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ga.txt

new file mode 100644 (file)

index 0000000..9ff88d7
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ga.txt
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhúr
+caoga
+ceathair
+ceathrar
+chomh
+chtó
+chuig
+chun
+cois
+céad
+cúig
+cúigear
+d'
+daichead
+dar
+de
+deich
+deichniúr
+den
+dhá
+do
+don
+dtí
+dá
+dár
+dó
+faoi
+faoin
+faoina
+faoinár
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inár
+is
+le
+leis
+lena
+lenár
+m'
+mar
+mo
+mé
+na
+nach
+naoi
+naonúr
+ná
+ní
+níor
+nó
+nócha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtó
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sé
+sí
+tar
+thar
+thú
+triúr
+trí
+trína
+trínár
+tríocha
+tú
+um
+ár
+é
+éis
+í
+ó
+ón
+óna
+ónár
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_gl.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_gl.txt

new file mode 100644 (file)

index 0000000..d8760b1
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_gl.txt
@@ -0,0 +1,161 @@
+# galican stopwords
+a
+aínda
+alí
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquí
+ao
+aos
+as
+así
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+había
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hi.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hi.txt

new file mode 100644 (file)

index 0000000..86286bb
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hi.txt
@@ -0,0 +1,235 @@
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# Note: by default this file also contains forms normalized by HindiNormalizer 
+# for spelling variation (see section below), such that it can be used whether or 
+# not you enable that feature. When adding additional entries to this list,
+# please add the normalized form as well. 
+अंदर
+अत
+अपना
+अपनी
+अपने
+अभी
+आदि
+आप
+इत्यादि
+इन 
+इनका
+इन्हीं
+इन्हें
+इन्हों
+इस
+इसका
+इसकी
+इसके
+इसमें
+इसी
+इसे
+उन
+उनका
+उनकी
+उनके
+उनको
+उन्हीं
+उन्हें
+उन्हों
+उस
+उसके
+उसी
+उसे
+एक
+एवं
+एस
+ऐसे
+और
+कई
+कर
+करता
+करते
+करना
+करने
+करें
+कहते
+कहा
+का
+काफ़ी
+कि
+कितना
+किन्हें
+किन्हों
+किया
+किर
+किस
+किसी
+किसे
+की
+कुछ
+कुल
+के
+को
+कोई
+कौन
+कौनसा
+गया
+घर
+जब
+जहाँ
+जा
+जितना
+जिन
+जिन्हें
+जिन्हों
+जिस
+जिसे
+जीधर
+जैसा
+जैसे
+जो
+तक
+तब
+तरह
+तिन
+तिन्हें
+तिन्हों
+तिस
+तिसे
+तो
+था
+थी
+थे
+दबारा
+दिया
+दुसरा
+दूसरे
+दो
+द्वारा
+न
+नहीं
+ना
+निहायत
+नीचे
+ने
+पर
+पर  
+पहले
+पूरा
+पे
+फिर
+बनी
+बही
+बहुत
+बाद
+बाला
+बिलकुल
+भी
+भीतर
+मगर
+मानो
+मे
+में
+यदि
+यह
+यहाँ
+यही
+या
+यिह 
+ये
+रखें
+रहा
+रहे
+ऱ्वासा
+लिए
+लिये
+लेकिन
+व
+वर्ग
+वह
+वह 
+वहाँ
+वहीं
+वाले
+वुह 
+वे
+वग़ैरह
+संग
+सकता
+सकते
+सबसे
+सभी
+साथ
+साबुत
+साभ
+सारा
+से
+सो
+ही
+हुआ
+हुई
+हुए
+है
+हैं
+हो
+होता
+होती
+होते
+होना
+होने
+# additional normalized forms of the above
+अपनि
+जेसे
+होति
+सभि
+तिंहों
+इंहों
+दवारा
+इसि
+किंहें
+थि
+उंहों
+ओर
+जिंहें
+वहिं
+अभि
+बनि
+हि
+उंहिं
+उंहें
+हें
+वगेरह
+एसे
+रवासा
+कोन
+निचे
+काफि
+उसि
+पुरा
+भितर
+हे
+बहि
+वहां
+कोइ
+यहां
+जिंहों
+तिंहें
+किसि
+कइ
+यहि
+इंहिं
+जिधर
+इंहें
+अदि
+इतयादि
+हुइ
+कोनसा
+इसकि
+दुसरे
+जहां
+अप
+किंहों
+उनकि
+भि
+वरग
+हुअ
+जेसा
+नहिं
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hu.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hu.txt

new file mode 100644 (file)

index 0000000..1a96f1d
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hu.txt
@@ -0,0 +1,209 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ 
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amíg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elő
+először
+előtt
+első
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+így
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kívül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+míg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+ő
+ők
+őket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hy.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hy.txt

new file mode 100644 (file)

index 0000000..60c1c50
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hy.txt
@@ -0,0 +1,46 @@
+# example set of Armenian stopwords.
+այդ
+այլ
+այն
+այս
+դու
+դուք
+եմ
+են
+ենք
+ես
+եք
+է
+էի
+էին
+էինք
+էիր
+էիք
+էր
+ըստ
+թ
+ի
+ին
+իսկ
+իր
+կամ
+համար
+հետ
+հետո
+մենք
+մեջ
+մի
+ն
+նա
+նաև
+նրա
+նրանք
+որ
+որը
+որոնք
+որպես
+ու
+ում
+պիտի
+վրա
+և
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_id.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_id.txt

new file mode 100644 (file)

index 0000000..4617f83
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_id.txt
@@ -0,0 +1,359 @@
+# from appendix D of: A Study of Stemming Effects on Information
+# Retrieval in Bahasa Indonesia
+ada
+adanya
+adalah
+adapun
+agak
+agaknya
+agar
+akan
+akankah
+akhirnya
+aku
+akulah
+amat
+amatlah
+anda
+andalah
+antar
+diantaranya
+antara
+antaranya
+diantara
+apa
+apaan
+mengapa
+apabila
+apakah
+apalagi
+apatah
+atau
+ataukah
+ataupun
+bagai
+bagaikan
+sebagai
+sebagainya
+bagaimana
+bagaimanapun
+sebagaimana
+bagaimanakah
+bagi
+bahkan
+bahwa
+bahwasanya
+sebaliknya
+banyak
+sebanyak
+beberapa
+seberapa
+begini
+beginian
+beginikah
+beginilah
+sebegini
+begitu
+begitukah
+begitulah
+begitupun
+sebegitu
+belum
+belumlah
+sebelum
+sebelumnya
+sebenarnya
+berapa
+berapakah
+berapalah
+berapapun
+betulkah
+sebetulnya
+biasa
+biasanya
+bila
+bilakah
+bisa
+bisakah
+sebisanya
+boleh
+bolehkah
+bolehlah
+buat
+bukan
+bukankah
+bukanlah
+bukannya
+cuma
+percuma
+dahulu
+dalam
+dan
+dapat
+dari
+daripada
+dekat
+demi
+demikian
+demikianlah
+sedemikian
+dengan
+depan
+di
+dia
+dialah
+dini
+diri
+dirinya
+terdiri
+dong
+dulu
+enggak
+enggaknya
+entah
+entahlah
+terhadap
+terhadapnya
+hal
+hampir
+hanya
+hanyalah
+harus
+haruslah
+harusnya
+seharusnya
+hendak
+hendaklah
+hendaknya
+hingga
+sehingga
+ia
+ialah
+ibarat
+ingin
+inginkah
+inginkan
+ini
+inikah
+inilah
+itu
+itukah
+itulah
+jangan
+jangankan
+janganlah
+jika
+jikalau
+juga
+justru
+kala
+kalau
+kalaulah
+kalaupun
+kalian
+kami
+kamilah
+kamu
+kamulah
+kan
+kapan
+kapankah
+kapanpun
+dikarenakan
+karena
+karenanya
+ke
+kecil
+kemudian
+kenapa
+kepada
+kepadanya
+ketika
+seketika
+khususnya
+kini
+kinilah
+kiranya
+sekiranya
+kita
+kitalah
+kok
+lagi
+lagian
+selagi
+lah
+lain
+lainnya
+melainkan
+selaku
+lalu
+melalui
+terlalu
+lama
+lamanya
+selama
+selama
+selamanya
+lebih
+terlebih
+bermacam
+macam
+semacam
+maka
+makanya
+makin
+malah
+malahan
+mampu
+mampukah
+mana
+manakala
+manalagi
+masih
+masihkah
+semasih
+masing
+mau
+maupun
+semaunya
+memang
+mereka
+merekalah
+meski
+meskipun
+semula
+mungkin
+mungkinkah
+nah
+namun
+nanti
+nantinya
+nyaris
+oleh
+olehnya
+seorang
+seseorang
+pada
+padanya
+padahal
+paling
+sepanjang
+pantas
+sepantasnya
+sepantasnyalah
+para
+pasti
+pastilah
+per
+pernah
+pula
+pun
+merupakan
+rupanya
+serupa
+saat
+saatnya
+sesaat
+saja
+sajalah
+saling
+bersama
+sama
+sesama
+sambil
+sampai
+sana
+sangat
+sangatlah
+saya
+sayalah
+se
+sebab
+sebabnya
+sebuah
+tersebut
+tersebutlah
+sedang
+sedangkan
+sedikit
+sedikitnya
+segala
+segalanya
+segera
+sesegera
+sejak
+sejenak
+sekali
+sekalian
+sekalipun
+sesekali
+sekaligus
+sekarang
+sekarang
+sekitar
+sekitarnya
+sela
+selain
+selalu
+seluruh
+seluruhnya
+semakin
+sementara
+sempat
+semua
+semuanya
+sendiri
+sendirinya
+seolah
+seperti
+sepertinya
+sering
+seringnya
+serta
+siapa
+siapakah
+siapapun
+disini
+disinilah
+sini
+sinilah
+sesuatu
+sesuatunya
+suatu
+sesudah
+sesudahnya
+sudah
+sudahkah
+sudahlah
+supaya
+tadi
+tadinya
+tak
+tanpa
+setelah
+telah
+tentang
+tentu
+tentulah
+tentunya
+tertentu
+seterusnya
+tapi
+tetapi
+setiap
+tiap
+setidaknya
+tidak
+tidakkah
+tidaklah
+toh
+waduh
+wah
+wahai
+sewaktu
+walau
+walaupun
+wong
+yaitu
+yakni
+yang
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_it.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_it.txt

new file mode 100644 (file)

index 0000000..4cb5b08
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_it.txt
@@ -0,0 +1,301 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad             |  a (to) before vowel
+al             |  a + il
+allo           |  a + lo
+ai             |  a + i
+agli           |  a + gli
+all            |  a + l'
+agl            |  a + gl'
+alla           |  a + la
+alle           |  a + le
+con            |  with
+col            |  con + il
+coi            |  con + i (forms collo, cogli etc are now very rare)
+da             |  from
+dal            |  da + il
+dallo          |  da + lo
+dai            |  da + i
+dagli          |  da + gli
+dall           |  da + l'
+dagl           |  da + gll'
+dalla          |  da + la
+dalle          |  da + le
+di             |  of
+del            |  di + il
+dello          |  di + lo
+dei            |  di + i
+degli          |  di + gli
+dell           |  di + l'
+degl           |  di + gl'
+della          |  di + la
+delle          |  di + le
+in             |  in
+nel            |  in + el
+nello          |  in + lo
+nei            |  in + i
+negli          |  in + gli
+nell           |  in + l'
+negl           |  in + gl'
+nella          |  in + la
+nelle          |  in + le
+su             |  on
+sul            |  su + il
+sullo          |  su + lo
+sui            |  su + i
+sugli          |  su + gli
+sull           |  su + l'
+sugl           |  su + gl'
+sulla          |  su + la
+sulle          |  su + le
+per            |  through, by
+tra            |  among
+contro         |  against
+io             |  I
+tu             |  thou
+lui            |  he
+lei            |  she
+noi            |  we
+voi            |  you
+loro           |  they
+mio            |  my
+mia            |
+miei           |
+mie            |
+tuo            |
+tua            |
+tuoi           |  thy
+tue            |
+suo            |
+sua            |
+suoi           |  his, her
+sue            |
+nostro         |  our
+nostra         |
+nostri         |
+nostre         |
+vostro         |  your
+vostra         |
+vostri         |
+vostre         |
+mi             |  me
+ti             |  thee
+ci             |  us, there
+vi             |  you, there
+lo             |  him, the
+la             |  her, the
+li             |  them
+le             |  them, the
+gli            |  to him, the
+ne             |  from there etc
+il             |  the
+un             |  a
+uno            |  a
+una            |  a
+ma             |  but
+ed             |  and
+se             |  if
+perché         |  why, because
+anche          |  also
+come           |  how
+dov            |  where (as dov')
+dove           |  where
+che            |  who, that
+chi            |  who
+cui            |  whom
+non            |  not
+più            |  more
+quale          |  who, that
+quanto         |  how much
+quanti         |
+quanta         |
+quante         |
+quello         |  that
+quelli         |
+quella         |
+quelle         |
+questo         |  this
+questi         |
+questa         |
+queste         |
+si             |  yes
+tutto          |  all
+tutti          |  all
+
+               |  single letter forms:
+
+a              |  at
+c              |  as c' for ce or ci
+e              |  and
+i              |  the
+l              |  as l'
+o              |  or
+
+               | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrà
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+               | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarà
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+               | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farà
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+               | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starà
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ja.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ja.txt

new file mode 100644 (file)

index 0000000..d4321be
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ja.txt
@@ -0,0 +1,127 @@
+#
+# This file defines a stopword set for Japanese.
+#
+# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
+# Punctuation characters and frequent kanji have mostly been left out.  See LUCENE-3745
+# for frequency lists, etc. that can be useful for making your own set (if desired)
+#
+# Note that there is an overlap between these stopwords and the terms stopped when used
+# in combination with the JapanesePartOfSpeechStopFilter.  When editing this file, note
+# that comments are not allowed on the same line as stopwords.
+#
+# Also note that stopping is done in a case-insensitive manner.  Change your StopFilter
+# configuration if you need case-sensitive stopping.  Lastly, note that stopping is done
+# using the same character width as the entries in this file.  Since this StopFilter is
+# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
+# entries to be in half-width and your kana entries to be in full-width.
+#
+の
+に
+は
+を
+た
+が
+で
+て
+と
+し
+れ
+さ
+ある
+いる
+も
+する
+から
+な
+こと
+として
+い
+や
+れる
+など
+なっ
+ない
+この
+ため
+その
+あっ
+よう
+また
+もの
+という
+あり
+まで
+られ
+なる
+へ
+か
+だ
+これ
+によって
+により
+おり
+より
+による
+ず
+なり
+られる
+において
+ば
+なかっ
+なく
+しかし
+について
+せ
+だっ
+その後
+できる
+それ
+う
+ので
+なお
+のみ
+でき
+き
+つ
+における
+および
+いう
+さらに
+でも
+ら
+たり
+その他
+に関する
+たち
+ます
+ん
+なら
+に対して
+特に
+せる
+及び
+これら
+とき
+では
+にて
+ほか
+ながら
+うち
+そして
+とともに
+ただし
+かつて
+それぞれ
+または
+お
+ほど
+ものの
+に対する
+ほとんど
+と共に
+といった
+です
+とも
+ところ
+ここ
+##### End of file
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_lv.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_lv.txt

new file mode 100644 (file)

index 0000000..e21a23c
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_lv.txt
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined: 
+#   pronouns, adverbs, interjections were removed
+# 
+# prepositions
+aiz
+ap
+ar
+apakš
+ārpus
+augšpus
+bez
+caur
+dēļ
+gar
+iekš
+iz
+kopš
+labad
+lejpus
+līdz
+no
+otrpus
+pa
+par
+pār
+pēc
+pie
+pirms
+pret
+priekš
+starp
+šaipus
+uz
+viņpus
+virs
+virspus
+zem
+apakšpus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tomēr
+tikko
+turpretī
+arī
+kaut
+gan
+tādēļ
+tā
+ne
+tikvien
+vien
+kā
+ir
+te
+vai
+kamēr
+# Particles
+ar
+diezin
+droši
+diemžēl
+nebūt
+ik
+it
+taču
+nu
+pat
+tiklab
+iekšpus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iekām
+iekāms
+kolīdz
+līdzko
+tiklīdz
+jebšu
+tālab
+tāpēc
+nekā
+itin
+jā
+jau
+jel
+nē
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+būt  
+biju 
+biji
+bija
+bijām
+bijāt
+esmu
+esi
+esam
+esat 
+būšu     
+būsi
+būs
+būsim
+būsiet
+tikt
+tiku
+tiki
+tika
+tikām
+tikāt
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tikšu
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tapāt
+topat
+tapšu
+tapsi
+taps
+tapsim
+tapsiet
+kļūt
+kļuvu
+kļuvi
+kļuva
+kļuvām
+kļuvāt
+kļūstu
+kļūsti
+kļūst
+kļūstam
+kļūstat
+kļūšu
+kļūsi
+kļūs
+kļūsim
+kļūsiet
+# verbs
+varēt
+varēju
+varējām
+varēšu
+varēsim
+var
+varēji
+varējāt
+varēsi
+varēsiet
+varat
+varēja
+varēs
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_nl.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_nl.txt

new file mode 100644 (file)

index 0000000..f4d61f5
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_nl.txt
@@ -0,0 +1,117 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de             |  the
+en             |  and
+van            |  of, from
+ik             |  I, the ego
+te             |  (1) chez, at etc, (2) to, (3) too
+dat            |  that, which
+die            |  that, those, who, which
+in             |  in, inside
+een            |  a, an, one
+hij            |  he
+het            |  the, it
+niet           |  not, nothing, naught
+zijn           |  (1) to be, being, (2) his, one's, its
+is             |  is
+was            |  (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op             |  on, upon, at, in, up, used up
+aan            |  on, upon, to (as dative)
+met            |  with, by
+als            |  like, such as, when
+voor           |  (1) before, in front of, (2) furrow
+had            |  had, past tense all persons sing. of 'hebben' (have)
+er             |  there
+maar           |  but, only
+om             |  round, about, for etc
+hem            |  him
+dan            |  then
+zou            |  should/would, past tense all persons sing. of 'zullen'
+of             |  or, whether, if
+wat            |  what, something, anything
+mijn           |  possessive and noun 'mine'
+men            |  people, 'one'
+dit            |  this
+zo             |  so, thus, in this way
+door           |  through by
+over           |  over, across
+ze             |  she, her, they, them
+zich           |  oneself
+bij            |  (1) a bee, (2) by, near, at
+ook            |  also, too
+tot            |  till, until
+je             |  you
+mij            |  me
+uit            |  out of, from
+der            |  Old Dutch form of 'van der' still found in surnames
+daar           |  (1) there, (2) because
+haar           |  (1) her, their, them, (2) hair
+naar           |  (1) unpleasant, unwell etc, (2) towards, (3) as
+heb            |  present first person sing. of 'to have'
+hoe            |  how, why
+heeft          |  present third person sing. of 'to have'
+hebben         |  'to have' and various parts thereof
+deze           |  this
+u              |  you
+want           |  (1) for, (2) mitten, (3) rigging
+nog            |  yet, still
+zal            |  'shall', first and third person sing. of verb 'zullen' (will)
+me             |  me
+zij            |  she, they
+nu             |  now
+ge             |  'thou', still used in Belgium and south Netherlands
+geen           |  none
+omdat          |  because
+iets           |  something, somewhat
+worden         |  to become, grow, get
+toch           |  yet, still
+al             |  all, every, each
+waren          |  (1) 'were' (2) to wander, (3) wares, (3)
+veel           |  much, many
+meer           |  (1) more, (2) lake
+doen           |  to do, to make
+toen           |  then, when
+moet           |  noun 'spot/mote' and present form of 'to must'
+ben            |  (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder         |  without
+kan            |  noun 'can' and present form of 'to be able'
+hun            |  their, them
+dus            |  so, consequently
+alles          |  all, everything, anything
+onder          |  under, beneath
+ja             |  yes, of course
+eens           |  once, one day
+hier           |  here
+wie            |  who
+werd           |  imperfect third person sing. of 'become'
+altijd         |  always
+doch           |  yet, but etc
+wordt          |  present third person sing. of 'become'
+wezen          |  (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen         |  to be able
+ons            |  us/our
+zelf           |  self
+tegen          |  against, towards, at
+na             |  after, near
+reeds          |  already
+wil            |  (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon            |  could; past tense of 'to be able'
+niets          |  nothing
+uw             |  your
+iemand         |  somebody
+geweest        |  been; past participle of 'be'
+andere         |  other
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_no.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_no.txt

new file mode 100644 (file)

index 0000000..e76f36e
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_no.txt
@@ -0,0 +1,192 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
+
+og             | and
+i              | in
+jeg            | I
+det            | it/this/that
+at             | to (w. inf.)
+en             | a/an
+et             | a/an
+den            | it/this/that
+til            | to
+er             | is/am/are
+som            | who/that
+på             | on
+de             | they / you(formal)
+med            | with
+han            | he
+av             | of
+ikke           | not
+ikkje          | not *
+der            | there
+så             | so
+var            | was/were
+meg            | me
+seg            | you
+men            | but
+ett            | one
+har            | have
+om             | about
+vi             | we
+min            | my
+mitt           | my
+ha             | have
+hadde          | had
+hun            | she
+nå             | now
+over           | over
+da             | when/as
+ved            | by/know
+fra            | from
+du             | you
+ut             | out
+sin            | your
+dem            | them
+oss            | us
+opp            | up
+man            | you/one
+kan            | can
+hans           | his
+hvor           | where
+eller          | or
+hva            | what
+skal           | shall/must
+selv           | self (reflective)
+sjøl           | self (reflective)
+her            | here
+alle           | all
+vil            | will
+bli            | become
+ble            | became
+blei           | became *
+blitt          | have become
+kunne          | could
+inn            | in
+når            | when
+være           | be
+kom            | come
+noen           | some
+noe            | some
+ville          | would
+dere           | you
+som            | who/which/that
+deres          | their/theirs
+kun            | only/just
+ja             | yes
+etter          | after
+ned            | down
+skulle         | should
+denne          | this
+for            | for/because
+deg            | you
+si             | hers/his
+sine           | hers/his
+sitt           | hers/his
+mot            | against
+å              | to
+meget          | much
+hvorfor        | why
+dette          | this
+disse          | these/those
+uten           | without
+hvordan        | how
+ingen          | none
+din            | your
+ditt           | your
+blir           | become
+samme          | same
+hvilken        | which
+hvilke         | which (plural)
+sånn           | such a
+inni           | inside/within
+mellom         | between
+vår            | our
+hver           | each
+hvem           | who
+vors           | us/ours
+hvis           | whose
+både           | both
+bare           | only/just
+enn            | than
+fordi          | as/because
+før            | before
+mange          | many
+også           | also
+slik           | just
+vært           | been
+være           | to be
+båe            | both *
+begge          | both
+siden          | since
+dykk           | your *
+dykkar         | yours *
+dei            | they *
+deira          | them *
+deires         | theirs *
+deim           | them *
+di             | your (fem.) *
+då             | as/when *
+eg             | I *
+ein            | a/an *
+eit            | a/an *
+eitt           | a/an *
+elles          | or *
+honom          | he *
+hjå            | at *
+ho             | she *
+hoe            | she *
+henne          | her
+hennar         | her/hers
+hennes         | hers
+hoss           | how *
+hossen         | how *
+ikkje          | not *
+ingi           | noone *
+inkje          | noone *
+korleis        | how *
+korso          | how *
+kva            | what/which *
+kvar           | where *
+kvarhelst      | where *
+kven           | who/whom *
+kvi            | why *
+kvifor         | why *
+me             | we *
+medan          | while *
+mi             | my *
+mine           | my *
+mykje          | much *
+no             | now *
+nokon          | some (masc./neut.) *
+noka           | some (fem.) *
+nokor          | some *
+noko           | some *
+nokre          | some *
+si             | his/hers *
+sia            | since *
+sidan          | since *
+so             | so *
+somt           | some *
+somme          | some *
+um             | about*
+upp            | up *
+vere           | be *
+vore           | was *
+verte          | become *
+vort           | become *
+varte          | became *
+vart           | became *
+
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_pt.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_pt.txt

new file mode 100644 (file)

index 0000000..276c1b4
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_pt.txt
@@ -0,0 +1,251 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de             |  of, from
+a              |  the; to, at; her
+o              |  the; him
+que            |  who, that
+e              |  and
+do             |  de + o
+da             |  de + a
+em             |  in
+um             |  a
+para           |  for
+  | é          from SER
+com            |  with
+não            |  not, no
+uma            |  a
+os             |  the; them
+no             |  em + o
+se             |  himself etc
+na             |  em + a
+por            |  for
+mais           |  more
+as             |  the; them
+dos            |  de + os
+como           |  as, like
+mas            |  but
+  | foi        from SER
+ao             |  a + o
+ele            |  he
+das            |  de + as
+  | tem        from TER
+à              |  a + a
+seu            |  his
+sua            |  her
+ou             |  or
+  | ser        from SER
+quando         |  when
+muito          |  much
+  | há         from HAV
+nos            |  em + os; us
+já             |  already, now
+  | está       from EST
+eu             |  I
+também         |  also
+só             |  only, just
+pelo           |  per + o
+pela           |  per + a
+até            |  up to
+isso           |  that
+ela            |  he
+entre          |  between
+  | era        from SER
+depois         |  after
+sem            |  without
+mesmo          |  same
+aos            |  a + os
+  | ter        from TER
+seus           |  his
+quem           |  whom
+nas            |  em + as
+me             |  me
+esse           |  that
+eles           |  they
+  | estão      from EST
+você           |  you
+  | tinha      from TER
+  | foram      from SER
+essa           |  that
+num            |  em + um
+nem            |  nor
+suas           |  her
+meu            |  my
+às             |  a + as
+minha          |  my
+  | têm        from TER
+numa           |  em + uma
+pelos          |  per + os
+elas           |  they
+  | havia      from HAV
+  | seja       from SER
+qual           |  which
+  | será       from SER
+nós            |  we
+  | tenho      from TER
+lhe            |  to him, her
+deles          |  of them
+essas          |  those
+esses          |  those
+pelas          |  per + as
+este           |  this
+  | fosse      from SER
+dele           |  of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu             |  thou
+te             |  thee
+vocês          |  you (plural)
+vos            |  you
+lhes           |  to them
+meus           |  my
+minhas
+teu            |  thy
+tua
+teus
+tuas
+nosso          | our
+nossa
+nossos
+nossas
+
+dela           |  of her
+delas          |  of them
+
+esta           |  this
+estes          |  these
+estas          |  these
+aquele         |  that
+aquela         |  that
+aqueles        |  those
+aquelas        |  those
+isto           |  this
+aquilo         |  that
+
+               | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+               | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houveríamos
+houveriam
+
+               | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+seríamos
+seriam
+
+               | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tínhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+teríamos
+teriam
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ro.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ro.txt

new file mode 100644 (file)

index 0000000..4fdee90
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ro.txt
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceşti
+aceştia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aş
+aşadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deşi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eşti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în 
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+nişte
+noastră
+noastre
+noi
+noştri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+şi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+ţi
+ţie
+tine
+toată
+toate
+tot
+toţi
+totuşi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voştri
+vostru
+vouă
+vreo
+vreun
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ru.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ru.txt

new file mode 100644 (file)

index 0000000..6430769
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ru.txt
@@ -0,0 +1,241 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `ё' is translated to `е'.
+
+и              | and
+в              | in/into
+во             | alternative form
+не             | not
+что            | what/that
+он             | he
+на             | on/onto
+я              | i
+с              | from
+со             | alternative form
+как            | how
+а              | milder form of `no' (but)
+то             | conjunction and form of `that'
+все            | all
+она            | she
+так            | so, thus
+его            | him
+но             | but
+да             | yes/and
+ты             | thou
+к              | towards, by
+у              | around, chez
+же             | intensifier particle
+вы             | you
+за             | beyond, behind
+бы             | conditional/subj. particle
+по             | up to, along
+только         | only
+ее             | her
+мне            | to me
+было           | it was
+вот            | here is/are, particle
+от             | away from
+меня           | me
+еще            | still, yet, more
+нет            | no, there isnt/arent
+о              | about
+из             | out of
+ему            | to him
+теперь         | now
+когда          | when
+даже           | even
+ну             | so, well
+вдруг          | suddenly
+ли             | interrogative particle
+если           | if
+уже            | already, but homonym of `narrower'
+или            | or
+ни             | neither
+быть           | to be
+был            | he was
+него           | prepositional form of его
+до             | up to
+вас            | you accusative
+нибудь         | indef. suffix preceded by hyphen
+опять          | again
+уж             | already, but homonym of `adder'
+вам            | to you
+сказал         | he said
+ведь           | particle `after all'
+там            | there
+потом          | then
+себя           | oneself
+ничего         | nothing
+ей             | to her
+может          | usually with `быть' as `maybe'
+они            | they
+тут            | here
+где            | where
+есть           | there is/are
+надо           | got to, must
+ней            | prepositional form of  ей
+для            | for
+мы             | we
+тебя           | thee
+их             | them, their
+чем            | than
+была           | she was
+сам            | self
+чтоб           | in order to
+без            | without
+будто          | as if
+человек        | man, person, one
+чего           | genitive form of `what'
+раз            | once
+тоже           | also
+себе           | to oneself
+под            | beneath
+жизнь          | life
+будет          | will be
+ж              | short form of intensifer particle `же'
+тогда          | then
+кто            | who
+этот           | this
+говорил        | was saying
+того           | genitive form of `that'
+потому         | for that reason
+этого          | genitive form of `this'
+какой          | which
+совсем         | altogether
+ним            | prepositional form of `его', `они'
+здесь          | here
+этом           | prepositional form of `этот'
+один           | one
+почти          | almost
+мой            | my
+тем            | instrumental/dative plural of `тот', `то'
+чтобы          | full form of `in order that'
+нее            | her (acc.)
+кажется        | it seems
+сейчас         | now
+были           | they were
+куда           | where to
+зачем          | why
+сказать        | to say
+всех           | all (acc., gen. preposn. plural)
+никогда        | never
+сегодня        | today
+можно          | possible, one can
+при            | by
+наконец        | finally
+два            | two
+об             | alternative form of `о', about
+другой         | another
+хоть           | even
+после          | after
+над            | above
+больше         | more
+тот            | that one (masc.)
+через          | across, in
+эти            | these
+нас            | us
+про            | about
+всего          | in all, only, of all
+них            | prepositional form of `они' (they)
+какая          | which, feminine
+много          | lots
+разве          | interrogative particle
+сказала        | she said
+три            | three
+эту            | this, acc. fem. sing.
+моя            | my, feminine
+впрочем        | moreover, besides
+хорошо         | good
+свою           | ones own, acc. fem. sing.
+этой           | oblique form of `эта', fem. `this'
+перед          | in front of
+иногда         | sometimes
+лучше          | better
+чуть           | a little
+том            | preposn. form of `that one'
+нельзя         | one must not
+такой          | such a one
+им             | to them
+более          | more
+всегда         | always
+конечно        | of course
+всю            | acc. fem. sing of `all'
+между          | between
+
+
+  | b: some paradigms
+  |
+  | personal pronouns
+  |
+  | я  меня  мне  мной  [мною]
+  | ты  тебя  тебе  тобой  [тобою]
+  | он  его  ему  им  [него, нему, ним]
+  | она  ее  эи  ею  [нее, нэи, нею]
+  | оно  его  ему  им  [него, нему, ним]
+  |
+  | мы  нас  нам  нами
+  | вы  вас  вам  вами
+  | они  их  им  ими  [них, ним, ними]
+  |
+  |   себя  себе  собой   [собою]
+  |
+  | demonstrative pronouns: этот (this), тот (that)
+  |
+  | этот  эта  это  эти
+  | этого  эты  это  эти
+  | этого  этой  этого  этих
+  | этому  этой  этому  этим
+  | этим  этой  этим  [этою]  этими
+  | этом  этой  этом  этих
+  |
+  | тот  та  то  те
+  | того  ту  то  те
+  | того  той  того  тех
+  | тому  той  тому  тем
+  | тем  той  тем  [тою]  теми
+  | том  той  том  тех
+  |
+  | determinative pronouns
+  |
+  | (a) весь (all)
+  |
+  | весь  вся  все  все
+  | всего  всю  все  все
+  | всего  всей  всего  всех
+  | всему  всей  всему  всем
+  | всем  всей  всем  [всею]  всеми
+  | всем  всей  всем  всех
+  |
+  | (b) сам (himself etc)
+  |
+  | сам  сама  само  сами
+  | самого саму  само  самих
+  | самого самой самого  самих
+  | самому самой самому  самим
+  | самим  самой  самим  [самою]  самими
+  | самом самой самом  самих
+  |
+  | stems of verbs `to be', `to have', `to do' and modal
+  |
+  | быть  бы  буд  быв  есть  суть
+  | име
+  | дел
+  | мог   мож  мочь
+  | уме
+  | хоч  хот
+  | долж
+  | можн
+  | нужн
+  | нельзя
+
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_sv.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_sv.txt

new file mode 100644 (file)

index 0000000..22bddfd
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_sv.txt
@@ -0,0 +1,131 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ |  så = so, but also seed. These are indicated clearly below.
+
+och            | and
+det            | it, this/that
+att            | to (with infinitive)
+i              | in, at
+en             | a
+jag            | I
+hon            | she
+som            | who, that
+han            | he
+på             | on
+den            | it, this/that
+med            | with
+var            | where, each
+sig            | him(self) etc
+för            | for
+så             | so (also: seed)
+till           | to
+är             | is
+men            | but
+ett            | a
+om             | if; around, about
+hade           | had
+de             | they, these/those
+av             | of
+icke           | not, no
+mig            | me
+du             | you
+henne          | her
+då             | then, when
+sin            | his
+nu             | now
+har            | have
+inte           | inte någon = no one
+hans           | his
+honom          | him
+skulle         | 'sake'
+hennes         | her
+där            | there
+min            | my
+man            | one (pronoun)
+ej             | nor
+vid            | at, by, on (also: vast)
+kunde          | could
+något          | some etc
+från           | from, off
+ut             | out
+när            | when
+efter          | after, behind
+upp            | up
+vi             | we
+dem            | them
+vara           | be
+vad            | what
+över           | over
+än             | than
+dig            | you
+kan            | can
+sina           | his
+här            | here
+ha             | have
+mot            | towards
+alla           | all
+under          | under (also: wonder)
+någon          | some etc
+eller          | or (else)
+allt           | all
+mycket         | much
+sedan          | since
+ju             | why
+denna          | this/that
+själv          | myself, yourself etc
+detta          | this/that
+åt             | to
+utan           | without
+varit          | was
+hur            | how
+ingen          | no
+mitt           | my
+ni             | you
+bli            | to be, become
+blev           | from bli
+oss            | us
+din            | thy
+dessa          | these/those
+några          | some etc
+deras          | their
+blir           | from bli
+mina           | my
+samma          | (the) same
+vilken         | who, that
+er             | you, your
+sådan          | such a
+vår            | our
+blivit         | from bli
+dess           | its
+inom           | within
+mellan         | between
+sådant         | such a
+varför         | why
+varje          | each
+vilka          | who, that
+ditt           | thy
+vem            | who
+vilket         | who, that
+sitta          | his
+sådana         | such a
+vart           | each
+dina           | thy
+vars           | whose
+vårt           | our
+våra           | our
+ert            | your
+era            | your
+vilkas         | whose
+
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_th.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_th.txt

new file mode 100644 (file)

index 0000000..07f0fab
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_th.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+แห่ง
+แล้ว
+และ
+แรก
+แบบ
+แต่
+เอง
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่อ
+เพื่อ
+เพราะ
+เป็นการ
+เป็น
+เปิดเผย
+เปิด
+เนื่องจาก
+เดียวกัน
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+อีก
+อาจ
+อะไร
+ออก
+อย่าง
+อยู่
+อยาก
+หาก
+หลาย
+หลังจาก
+หลัง
+หรือ
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สําหรับ
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาก
+มา
+พร้อม
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นํา
+นั้น
+นัก
+นอกจาก
+ทุก
+ที่สุด
+ที่
+ทําให้
+ทํา
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูก
+ถึง
+ต้อง
+ต่างๆ
+ต่าง
+ต่อ
+ตาม
+ตั้งแต่
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาก
+จัด
+จะ
+คือ
+ความ
+ครั้ง
+คง
+ขึ้น
+ของ
+ขอ
+ขณะ
+ก่อน
+ก็
+การ
+กับ
+กัน
+กว่า
+กล่าว
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_tr.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_tr.txt

new file mode 100644 (file)

index 0000000..84d9408
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_tr.txt
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+#   (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmış
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beş
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birşey
+birşeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+değil
+diğer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eğer
+elli
+en
+etmesi
+etti
+ettiği
+ettiğini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+işte
+itibaren
+itibariyle
+kadar
+karşın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduğu
+olduğunu
+olduklarını
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+rağmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+şey
+şeyden
+şeyi
+şeyler
+şöyle
+şu
+şuna
+şunda
+şundan
+şunları
+şunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptığı
+yaptığını
+yaptıkları
+yedi
+yerine
+yetmiş
+yine
+yirmi
+yoksa
+yüz
+zaten
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/lang/userdict_ja.txt b/zookeeper/example-schemaless/solr/collection1/conf/lang/userdict_ja.txt

new file mode 100644 (file)

index 0000000..6f0368e
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/lang/userdict_ja.txt
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags.  Notice that entries do
+# not have weights since they are always used when found.  This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+#  <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same <text> is undefined.
+#
+# Whitespace only lines are ignored.  Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
+関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
+
+# Custom segmentation for compound katakana
+トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
+ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
+
+# Custom reading for former sumo wrestler
+朝青龍,朝青龍,アサショウリュウ,カスタム人名
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/protwords.txt b/zookeeper/example-schemaless/solr/collection1/conf/protwords.txt

new file mode 100644 (file)

index 0000000..1dfc0ab
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/protwords.txt
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/schema.xml b/zookeeper/example-schemaless/solr/collection1/conf/schema.xml

new file mode 100644 (file)

index 0000000..a157715
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/schema.xml
@@ -0,0 +1,1072 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--  
+ This is the Solr schema file. This file should be named "schema.xml" and
+ should be in the conf directory under the solr home
+ (i.e. ./solr/conf/schema.xml by default) 
+ or located where the classloader for the Solr webapp can find it.
+
+ This example schema is the recommended starting point for users.
+ It should be kept correct and concise, usable out-of-the-box.
+
+ For more information, on how to customize this file, please see
+ http://wiki.apache.org/solr/SchemaXml
+
+ PERFORMANCE NOTE: this schema includes many optional features and should not
+ be used for benchmarking.  To improve performance one could
+  - set stored="false" for all fields possible (esp large fields) when you
+    only need to search on the field but don't need to return the original
+    value.
+  - set indexed="false" if you don't need to search on the field, but only
+    return the field as a result of searching on other indexed fields.
+  - remove all unneeded copyField statements
+  - for best index size and searching performance, set "index" to false
+    for all general text fields, use copyField to copy them to the
+    catchall "text" field, and use that for searching.
+  - For maximum indexing performance, use the StreamingUpdateSolrServer
+    java client.
+  - Remember to run the JVM in server mode, and use a higher logging level
+    that avoids logging every request
+-->
+
+<schema name="example-schemaless" version="1.5">
+  <!-- attribute "name" is the name of this schema and is only used for display purposes.
+       version="x.y" is Solr's version number for the schema syntax and 
+       semantics.  It should not normally be changed by applications.
+
+       1.0: multiValued attribute did not exist, all fields are multiValued 
+            by nature
+       1.1: multiValued attribute introduced, false by default 
+       1.2: omitTermFreqAndPositions attribute introduced, true by default 
+            except for text fields.
+       1.3: removed optional field compress feature
+       1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
+            behavior when a single string produces multiple tokens.  Defaults 
+            to off for version >= 1.4
+       1.5: omitNorms defaults to true for primitive field types 
+            (int, float, boolean, string...)
+     -->
+
+  <fields>
+    <!-- Valid attributes for fields:
+     name: mandatory - the name for the field
+     type: mandatory - the name of a field type from the 
+       <types> fieldType section
+     indexed: true if this field should be indexed (searchable or sortable)
+     stored: true if this field should be retrievable
+     docValues: true if this field should have doc values. Doc values are
+       useful for faceting, grouping, sorting and function queries. Although not
+       required, doc values will make the index faster to load, more
+       NRT-friendly and more memory-efficient. They however come with some
+       limitations: they are currently only supported by StrField, UUIDField
+       and all Trie*Fields, and depending on the field type, they might
+       require the field to be single-valued, be required or have a default
+       value (check the documentation of the field type you're interested in
+       for more information)
+     multiValued: true if this field may contain multiple values per document
+     omitNorms: (expert) set to true to omit the norms associated with
+       this field (this disables length normalization and index-time
+       boosting for the field, and saves some memory).  Only full-text
+       fields or fields that need an index-time boost need norms.
+       Norms are omitted for primitive (non-analyzed) types by default.
+     termVectors: [false] set to true to store the term vector for a
+       given field.
+       When using MoreLikeThis, fields used for similarity should be
+       stored for best performance.
+     termPositions: Store position information with the term vector.  
+       This will increase storage costs.
+     termOffsets: Store offset information with the term vector. This 
+       will increase storage costs.
+     required: The field is required.  It will throw an error if the
+       value does not exist
+     default: a value that should be used if no value is specified
+       when adding a document.
+   -->
+
+    <!-- field names should consist of alphanumeric or underscore characters only and
+      not start with a digit.  This is not currently strictly enforced,
+      but other field names will not have first class support from all components
+      and back compatibility is not guaranteed.  Names with both leading and
+      trailing underscores (e.g. _version_) are reserved.
+   -->
+
+    <!-- In this "schemaless" example, only two fields are pre-declared: id and _version_.
+         All other fields will be type guessed and added via the
+         "add-unknown-fields-to-the-schema" update request processor chain declared 
+         in solrconfig.xml.
+      -->
+    <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
+    <field name="_version_" type="long" indexed="true" stored="true"/>
+
+
+    <!-- Dynamic field definitions allow using convention over configuration
+       for fields via the specification of patterns to match field names. 
+       EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
+       RESTRICTION: the glob-like pattern in the name attribute must have
+       a "*" only at the start or the end.  -->
+   
+    <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
+    <dynamicField name="*_is" type="int"    indexed="true"  stored="true"  multiValued="true"/>
+    <dynamicField name="*_s"  type="string"  indexed="true"  stored="true" />
+    <dynamicField name="*_ss" type="string"  indexed="true"  stored="true" multiValued="true"/>
+    <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
+    <dynamicField name="*_ls" type="long"   indexed="true"  stored="true"  multiValued="true"/>
+    <dynamicField name="*_t"  type="text_general"    indexed="true"  stored="true"/>
+    <dynamicField name="*_txt" type="text_general"   indexed="true"  stored="true" multiValued="true"/>
+    <dynamicField name="*_en"  type="text_en"    indexed="true"  stored="true" multiValued="true"/>
+    <dynamicField name="*_b"  type="boolean" indexed="true" stored="true"/>
+    <dynamicField name="*_bs" type="boolean" indexed="true" stored="true"  multiValued="true"/>
+    <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
+    <dynamicField name="*_fs" type="float"  indexed="true"  stored="true"  multiValued="true"/>
+    <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
+    <dynamicField name="*_ds" type="double" indexed="true"  stored="true"  multiValued="true"/>
+
+    <!-- Type used to index the lat and lon components for the "location" FieldType -->
+    <dynamicField name="*_coordinate"  type="tdouble" indexed="true"  stored="false" />
+
+    <dynamicField name="*_dt"  type="date"    indexed="true"  stored="true"/>
+    <dynamicField name="*_dts" type="date"    indexed="true"  stored="true" multiValued="true"/>
+    <dynamicField name="*_p"  type="location" indexed="true" stored="true"/>
+
+    <!-- some trie-coded dynamic fields for faster range queries -->
+    <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
+    <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
+    <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
+    <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
+    <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>
+
+    <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/>
+    <dynamicField name="*_c"   type="currency" indexed="true"  stored="true"/>
+
+    <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
+    <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
+
+    <dynamicField name="random_*" type="random" />
+
+    <!-- uncomment the following to ignore any fields that don't already match an existing 
+        field name or dynamic field, rather than reporting them as an error. 
+        alternately, change the type="ignored" to some other type e.g. "text" if you want 
+        unknown fields indexed and/or stored by default 
+        
+        NB: use of "*" dynamic fields will disable field type guessing and adding
+        unknown fields to the schema. --> 
+    <!--dynamicField name="*" type="ignored" multiValued="true" /-->
+   
+  </fields>
+
+
+  <!-- Field to use to determine and enforce document uniqueness. 
+      Unless this field is marked with required="false", it will be a required field
+   -->
+  <uniqueKey>id</uniqueKey>
+
+  <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
+  parsing a query string that isn't explicit about the field.  Machine (non-user)
+  generated queries are best made explicit, or they can use the "df" request parameter
+  which takes precedence over this.
+  Note: Un-commenting defaultSearchField will be insufficient if your request handler
+  in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
+ <defaultSearchField>text</defaultSearchField> -->
+
+  <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
+  when parsing a query string to determine if a clause of the query should be marked as
+  required or optional, assuming the clause isn't already marked by some operator.
+  The default is OR, which is generally assumed so it is not a good idea to change it
+  globally here.  The "q.op" request parameter takes precedence over this.
+ <solrQueryParser defaultOperator="OR"/> -->
+
+  <!-- copyField commands copy one field to another at the time a document
+        is added to the index.  It's used either to index the same field differently,
+        or to add multiple fields to the same field for easier/faster searching.
+
+   <copyField source="cat" dest="text"/>
+   <copyField source="name" dest="text"/>
+   <copyField source="manu" dest="text"/>
+   <copyField source="features" dest="text"/>
+   <copyField source="includes" dest="text"/>
+   <copyField source="manu" dest="manu_exact"/>
+   -->
+
+  <!-- Copy the price into a currency enabled field (default USD)
+   <copyField source="price" dest="price_c"/>
+   -->
+
+  <!-- Text fields from SolrCell to search by default in our catch-all field
+   <copyField source="title" dest="text"/>
+   <copyField source="author" dest="text"/>
+   <copyField source="description" dest="text"/>
+   <copyField source="keywords" dest="text"/>
+   <copyField source="content" dest="text"/>
+   <copyField source="content_type" dest="text"/>
+   <copyField source="resourcename" dest="text"/>
+   <copyField source="url" dest="text"/>
+   -->
+
+  <!-- Create a string version of author for faceting
+   <copyField source="author" dest="author_s"/>
+   -->
+       
+  <!-- Above, multiple source fields are copied to the [text] field. 
+         Another way to map multiple source fields to the same 
+         destination field is to use the dynamic field syntax. 
+         copyField also supports a maxChars to copy setting.  -->
+          
+  <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
+
+  <!-- copy name to alphaNameSort, a field designed for sorting by name -->
+  <!-- <copyField source="name" dest="alphaNameSort"/> -->
+ 
+  <types>
+    <!-- field type definitions. The "name" attribute is
+       just a label to be used by field definitions.  The "class"
+       attribute and any other attributes determine the real
+       behavior of the fieldType.
+         Class names starting with "solr" refer to java classes in a
+       standard package such as org.apache.solr.analysis
+    -->
+
+    <!-- The StrField type is not analyzed, but indexed/stored verbatim.
+       It supports doc values but in that case the field needs to be
+       single-valued and either required or have a default value.
+      -->
+    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
+
+    <!-- boolean type: "true" or "false" -->
+    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
+
+    <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
+
+    <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
+         currently supported on types that are sorted internally as strings
+         and on numeric types.
+            This includes "string","boolean", and, as of 3.5 (and 4.x),
+            int, float, long, date, double, including the "Trie" variants.
+       - If sortMissingLast="true", then a sort on this field will cause documents
+         without the field to come after documents with the field,
+         regardless of the requested sort order (asc or desc).
+       - If sortMissingFirst="true", then a sort on this field will cause documents
+         without the field to come before documents with the field,
+         regardless of the requested sort order.
+       - If sortMissingLast="false" and sortMissingFirst="false" (the default),
+         then default lucene sorting will be used which places docs without the
+         field first in an ascending sort and last in a descending sort.
+    -->    
+
+    <!--
+      Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
+
+      These fields support doc values, but they require the field to be
+      single-valued and either be required or have a default value.
+    -->
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
+
+    <!--
+     Numeric field types that index each value at various levels of precision
+     to accelerate range queries when the number of values between the range
+     endpoints is large. See the javadoc for NumericRangeQuery for internal
+     implementation details.
+
+     Smaller precisionStep values (specified in bits) will lead to more tokens
+     indexed per value, slightly larger index size, and faster range queries.
+     A precisionStep of 0 disables indexing at different precision levels.
+    -->
+    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
+    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
+    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
+    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
+    
+    <fieldType name="tints" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
+    <fieldType name="tfloats" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
+    <fieldType name="tlongs" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
+    <fieldType name="tdoubles" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
+
+    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
+         is a more restricted form of the canonical representation of dateTime
+         http://www.w3.org/TR/xmlschema-2/#dateTime    
+         The trailing "Z" designates UTC time and is mandatory.
+         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
+         All other components are mandatory.
+
+         Expressions can also be used to denote calculations that should be
+         performed relative to "NOW" to determine the value, ie...
+
+               NOW/HOUR
+                  ... Round to the start of the current hour
+               NOW-1DAY
+                  ... Exactly 1 day prior to now
+               NOW/DAY+6MONTHS+3DAYS
+                  ... 6 months and 3 days in the future from the start of
+                      the current day
+                      
+         Consult the DateField javadocs for more information.
+
+         Note: For faster range queries, consider the tdate type
+      -->
+    <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
+
+    <!-- A Trie based date field for faster date range queries and date faceting. -->
+    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
+
+    <fieldType name="tdates" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
+
+
+    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
+    <fieldtype name="binary" class="solr.BinaryField"/>
+
+    <!--
+      Note:
+      These should only be used for compatibility with existing indexes (created with lucene or older Solr versions).
+      Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
+      
+      Plain numeric field types that store and index the text
+      value verbatim (and hence don't correctly support range queries, since the
+      lexicographic ordering isn't equal to the numeric ordering)
+    -->
+    <fieldType name="pint" class="solr.IntField"/>
+    <fieldType name="plong" class="solr.LongField"/>
+    <fieldType name="pfloat" class="solr.FloatField"/>
+    <fieldType name="pdouble" class="solr.DoubleField"/>
+    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
+
+    <!-- The "RandomSortField" is not used to store or search any
+         data.  You can declare fields of this type it in your schema
+         to generate pseudo-random orderings of your docs for sorting 
+         or function purposes.  The ordering is generated based on the field
+         name and the version of the index. As long as the index version
+         remains unchanged, and the same field name is reused,
+         the ordering of the docs will be consistent.  
+         If you want different psuedo-random orderings of documents,
+         for the same version of the index, use a dynamicField and
+         change the field name in the request.
+     -->
+    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
+
+    <!-- solr.TextField allows the specification of custom text analyzers
+         specified as a tokenizer and a list of token filters. Different
+         analyzers may be specified for indexing and querying.
+
+         The optional positionIncrementGap puts space between multiple fields of
+         this type on the same document, with the purpose of preventing false phrase
+         matching across fields.
+
+         For more info on customizing your analyzer chain, please see
+         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
+     -->
+
+    <!-- One can also specify an existing Analyzer class that has a
+         default constructor via the class attribute on the analyzer element.
+         Example:
+    <fieldType name="text_greek" class="solr.TextField">
+      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
+    </fieldType>
+    -->
+
+    <!-- A text field that only splits on whitespace for exact matching of words -->
+    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A general text field that has reasonable, generic
+         cross-language defaults: it tokenizes with StandardTokenizer,
+        removes stop words from case-insensitive "stopwords.txt"
+        (empty by default), and down cases.  At query time only, it
+        also applies synonyms. -->
+    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A text field with defaults appropriate for English: it
+         tokenizes with StandardTokenizer, removes English stop words
+         (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
+         finally applies Porter's stemming.  The query time analyzer
+         also applies synonyms from synonyms.txt. -->
+    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <!-- Case insensitive stop word removal.
+        -->
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="lang/stopwords_en.txt"
+            />
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+       -->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="lang/stopwords_en.txt"
+            />
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+       -->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A text field with defaults appropriate for English, plus
+        aggressive word-splitting and autophrase features enabled.
+        This field is just like text_en, except it adds
+        WordDelimiterFilter to enable splitting and matching of
+        words on case-change, alpha numeric boundaries, and
+        non-alphanumeric chars.  This means certain compound word
+        cases will work, for example query "wi fi" will match
+        document "WiFi" or "wi-fi".
+        -->
+    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <!-- Case insensitive stop word removal.
+        -->
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="lang/stopwords_en.txt"
+            />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="lang/stopwords_en.txt"
+            />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
+         but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
+    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
+             possible with WordDelimiterFilter in conjuncton with stemming. -->
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- Just like text_general except it reverses the characters of
+        each token, to enable more efficient leading wildcard queries. -->
+    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
+                maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- charFilter + WhitespaceTokenizer  -->
+    <!--
+    <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
+      <analyzer>
+        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+    -->
+
+    <!-- This is an example of using the KeywordTokenizer along
+         With various TokenFilterFactories to produce a sortable field
+         that does not include some properties of the source text
+      -->
+    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
+      <analyzer>
+        <!-- KeywordTokenizer does no actual tokenizing, so the entire
+             input string is preserved as a single token
+          -->
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <!-- The LowerCase TokenFilter does what you expect, which can be
+             when you want your sorting to be case insensitive
+          -->
+        <filter class="solr.LowerCaseFilterFactory" />
+        <!-- The TrimFilter removes any leading or trailing whitespace -->
+        <filter class="solr.TrimFilterFactory" />
+        <!-- The PatternReplaceFilter gives you the flexibility to use
+             Java Regular expression to replace any sequence of characters
+             matching a pattern with an arbitrary replacement string, 
+             which may include back references to portions of the original
+             string matched by the pattern.
+             
+             See the Java Regular Expression documentation for more
+             information on pattern and replacement string syntax.
+             
+             http://docs.oracle.com/javase/7/docs/api/java/util/regex/package-summary.html
+          -->
+        <filter class="solr.PatternReplaceFilterFactory"
+                pattern="([^a-z])" replacement="" replace="all"
+            />
+      </analyzer>
+    </fieldType>
+    
+    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
+      </analyzer>
+    </fieldtype>
+
+    <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <!--
+        The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
+        a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
+        Attributes of the DelimitedPayloadTokenFilterFactory : 
+         "delimiter" - a one character delimiter. Default is | (pipe)
+        "encoder" - how to encode the following value into a playload
+           float -> org.apache.lucene.analysis.payloads.FloatEncoder,
+           integer -> o.a.l.a.p.IntegerEncoder
+           identity -> o.a.l.a.p.IdentityEncoder
+            Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
+         -->
+        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- lowercases the entire field value, keeping it as a single token.  -->
+    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+    <!-- 
+      Example of using PathHierarchyTokenizerFactory at index time, so
+      queries for paths match documents at that path, or in descendent paths
+    -->
+    <fieldType name="descendent_path" class="solr.TextField">
+      <analyzer type="index">
+        <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.KeywordTokenizerFactory" />
+      </analyzer>
+    </fieldType>
+    <!-- 
+      Example of using PathHierarchyTokenizerFactory at query time, so
+      queries for paths match documents at that path, or in ancestor paths
+    -->
+    <fieldType name="ancestor_path" class="solr.TextField">
+      <analyzer type="index">
+        <tokenizer class="solr.KeywordTokenizerFactory" />
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
+      </analyzer>
+    </fieldType>
+
+    <!-- since fields of this type are by default not stored or indexed,
+         any data added to them will be ignored outright.  --> 
+    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
+
+    <!-- This point type indexes the coordinates as separate fields (subFields)
+      If subFieldType is defined, it references a type, and a dynamic field
+      definition is created matching *___<typename>.  Alternately, if 
+      subFieldSuffix is defined, that is used to create the subFields.
+      Example: if subFieldType="double", then the coordinates would be
+        indexed in fields myloc_0___double,myloc_1___double.
+      Example: if subFieldSuffix="_d" then the coordinates would be indexed
+        in fields myloc_0_d,myloc_1_d
+      The subFields are an implementation detail of the fieldType, and end
+      users normally should not need to know about them.
+     -->
+    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
+
+    <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
+    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
+
+    <!-- An alternative geospatial field type new to Solr 4.  It supports multiValued and polygon shapes.
+      For more information about this and other Spatial fields new to Solr 4, see:
+      http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
+    -->
+    <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
+               geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
+
+    <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
+        Parameters:
+          defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
+          precisionStep:   Specifies the precisionStep for the TrieLong field used for the amount
+          providerClass:   Lets you plug in other exchange provider backend:
+                           solr.FileExchangeRateProvider is the default and takes one parameter:
+                             currencyConfig: name of an xml file holding exchange rates
+                           solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
+                             ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
+                             refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
+   -->
+    <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
+             
+
+
+    <!-- some examples for different languages (generally ordered by ISO code) -->
+
+    <!-- Arabic -->
+    <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- for any non-arabic -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
+        <!-- normalizes ﻯ to ﻱ, etc -->
+        <filter class="solr.ArabicNormalizationFilterFactory"/>
+        <filter class="solr.ArabicStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- Bulgarian -->
+    <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/> 
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" /> 
+        <filter class="solr.BulgarianStemFilterFactory"/>       
+      </analyzer>
+    </fieldType>
+    
+    <!-- Catalan -->
+    <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- removes l', etc -->
+        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
+        <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>       
+      </analyzer>
+    </fieldType>
+    
+    <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
+    <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- normalize width before bigram, as e.g. half-width dakuten combine  -->
+        <filter class="solr.CJKWidthFilterFactory"/>
+        <!-- for any non-CJK -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.CJKBigramFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- Czech -->
+    <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
+        <filter class="solr.CzechStemFilterFactory"/>       
+      </analyzer>
+    </fieldType>
+    
+    <!-- Danish -->
+    <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
+        <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>       
+      </analyzer>
+    </fieldType>
+    
+    <!-- German -->
+    <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
+        <filter class="solr.GermanNormalizationFilterFactory"/>
+        <filter class="solr.GermanLightStemFilterFactory"/>
+        <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Greek -->
+    <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- greek specific lowercase for sigma -->
+        <filter class="solr.GreekLowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
+        <filter class="solr.GreekStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Spanish -->
+    <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
+        <filter class="solr.SpanishLightStemFilterFactory"/>
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Basque -->
+    <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
+        <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Persian -->
+    <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <!-- for ZWNJ -->
+        <charFilter class="solr.PersianCharFilterFactory"/>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ArabicNormalizationFilterFactory"/>
+        <filter class="solr.PersianNormalizationFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
+      </analyzer>
+    </fieldType>
+    
+    <!-- Finnish -->
+    <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
+        <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
+        <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- French -->
+    <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- removes l', etc -->
+        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
+        <filter class="solr.FrenchLightStemFilterFactory"/>
+        <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Irish -->
+    <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- removes d', etc -->
+        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
+        <!-- removes n-, etc. position increments is intentionally false! -->
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
+        <filter class="solr.IrishLowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Galician -->
+    <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
+        <filter class="solr.GalicianStemFilterFactory"/>
+        <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Hindi -->
+    <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <!-- normalizes unicode representation -->
+        <filter class="solr.IndicNormalizationFilterFactory"/>
+        <!-- normalizes variation in spelling -->
+        <filter class="solr.HindiNormalizationFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
+        <filter class="solr.HindiStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Hungarian -->
+    <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
+        <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
+        <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->   
+      </analyzer>
+    </fieldType>
+    
+    <!-- Armenian -->
+    <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
+        <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Indonesian -->
+    <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
+        <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
+        <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Italian -->
+    <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- removes l', etc -->
+        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
+        <filter class="solr.ItalianLightStemFilterFactory"/>
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
+
+         NOTE: If you want to optimize search for precision, use default operator AND in your query
+         parser config with <solrQueryParser defaultOperator="AND"/> further down in this file.  Use 
+         OR if you would like to optimize for recall (default).
+    -->
+    <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
+      <analyzer>
+        <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
+
+           Kuromoji has a search mode (default) that does segmentation useful for search.  A heuristic
+           is used to segment compounds into its parts and the compound itself is kept as synonym.
+
+           Valid values for attribute mode are:
+              normal: regular segmentation
+              search: segmentation useful for search with synonyms compounds (default)
+            extended: same as search mode, but unigrams unknown words (experimental)
+
+           For some applications it might be good to use search mode for indexing and normal mode for
+           queries to reduce recall and prevent parts of compounds from being matched and highlighted.
+           Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
+
+           Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
+           model with your own entries for segmentation, part-of-speech tags and readings without a need
+           to specify weights.  Notice that user dictionaries have not been subject to extensive testing.
+
+           User dictionary attributes are:
+                     userDictionary: user dictionary filename
+             userDictionaryEncoding: user dictionary encoding (default is UTF-8)
+
+           See lang/userdict_ja.txt for a sample user dictionary file.
+
+           Punctuation characters are discarded by default.  Use discardPunctuation="false" to keep them.
+
+           See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
+        -->
+        <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
+        <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
+        <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
+        <filter class="solr.JapaneseBaseFormFilterFactory"/>
+        <!-- Removes tokens with certain part-of-speech tags -->
+        <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
+        <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
+        <filter class="solr.CJKWidthFilterFactory"/>
+        <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
+        <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
+        <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
+        <!-- Lower-cases romaji characters -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Latvian -->
+    <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
+        <filter class="solr.LatvianStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Dutch -->
+    <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
+        <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Norwegian -->
+    <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
+        <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
+        <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
+        <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Portuguese -->
+    <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
+        <filter class="solr.PortugueseLightStemFilterFactory"/>
+        <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
+        <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Romanian -->
+    <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
+        <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Russian -->
+    <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
+        <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
+        <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Swedish -->
+    <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
+        <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
+        <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Thai -->
+    <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ThaiWordFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
+      </analyzer>
+    </fieldType>
+    
+    <!-- Turkish -->
+    <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.TurkishLowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
+        <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
+      </analyzer>
+    </fieldType>
+
+  </types>
+  
+  <!-- Similarity is the scoring routine for each document vs. a query.
+       A custom Similarity or SimilarityFactory may be specified here, but 
+       the default is fine for most applications.  
+       For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
+    -->
+  <!--
+     <similarity class="com.example.solr.CustomSimilarityFactory">
+       <str name="paramkey">param value</str>
+     </similarity>
+    -->
+
+</schema>
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/solrconfig.xml b/zookeeper/example-schemaless/solr/collection1/conf/solrconfig.xml

new file mode 100644 (file)

index 0000000..d9c941f
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/solrconfig.xml
@@ -0,0 +1,1888 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- 
+     For more details about configurations options that may appear in
+     this file, see http://wiki.apache.org/solr/SolrConfigXml. 
+-->
+<config>
+  <!-- In all configuration below, a prefix of "solr." for class names
+       is an alias that causes solr to search appropriate packages,
+       including org.apache.solr.(search|update|request|core|analysis)
+
+       You may also specify a fully qualified Java classname if you
+       have your own custom plugins.
+    -->
+
+  <!-- Controls what version of Lucene various components of Solr
+       adhere to.  Generally, you want to use the latest version to
+       get all bug fixes and improvements. It is highly recommended
+       that you fully re-index after changing this setting as it can
+       affect both how text is indexed and queried.
+  -->
+  <luceneMatchVersion>4.4</luceneMatchVersion>
+
+  <!-- <lib/> directives can be used to instruct Solr to load an Jars
+       identified and use them to resolve any "plugins" specified in
+       your solrconfig.xml or schema.xml (ie: Analyzers, Request
+       Handlers, etc...).
+
+       All directories and paths are resolved relative to the
+       instanceDir.
+
+       Please note that <lib/> directives are processed in the order
+       that they appear in your solrconfig.xml file, and are "stacked" 
+       on top of each other when building a ClassLoader - so if you have 
+       plugin jars with dependencies on other jars, the "lower level" 
+       dependency jars should be loaded first.
+
+       If a "./lib" directory exists in your instanceDir, all files
+       found in it are included as if you had used the following
+       syntax...
+       
+              <lib dir="./lib" />
+    -->
+
+  <!-- A 'dir' option by itself adds any files found in the directory 
+       to the classpath, this is useful for including all jars in a
+       directory.
+
+       When a 'regex' is specified in addition to a 'dir', only the
+       files in that directory which completely match the regex
+       (anchored on both ends) will be included.
+
+       If a 'dir' option (with or without a regex) is used and nothing
+       is found that matches, a warning will be logged.
+
+       The examples below can be used to load some solr-contribs along 
+       with their external dependencies.
+    -->
+  <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
+  <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+
+  <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
+  <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+
+  <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
+  <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+  <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
+  <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+
+  <!-- an exact 'path' can be used instead of a 'dir' to specify a 
+       specific jar file.  This will cause a serious error to be logged 
+       if it can't be loaded.
+    -->
+  <!--
+     <lib path="../a-jar-that-does-not-exist.jar" /> 
+  -->
+
+  <!-- Data Directory
+
+       Used to specify an alternate directory to hold all index data
+       other than the default ./data under the Solr home.  If
+       replication is in use, this should match the replication
+       configuration.
+    -->
+  <dataDir>${solr.data.dir:}</dataDir>
+
+
+  <!-- The DirectoryFactory to use for indexes.
+       
+       solr.StandardDirectoryFactory is filesystem
+       based and tries to pick the best implementation for the current
+       JVM and platform.  solr.NRTCachingDirectoryFactory, the default,
+       wraps solr.StandardDirectoryFactory and caches small files in memory
+       for better NRT performance.
+
+       One can force a particular implementation via solr.MMapDirectoryFactory,
+       solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
+
+       solr.RAMDirectoryFactory is memory based, not
+       persistent, and doesn't work with replication.
+    -->
+  <directoryFactory name="DirectoryFactory"
+                    class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
+
+  <!-- The CodecFactory for defining the format of the inverted index.
+       The default implementation is SchemaCodecFactory, which is the official Lucene
+       index format, but hooks into the schema to provide per-field customization of
+       the postings lists and per-document values in the fieldType element
+       (postingsFormat/docValuesFormat). Note that most of the alternative implementations
+       are experimental, so if you choose to customize the index format, its a good
+       idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
+       before upgrading to a newer version to avoid unnecessary reindexing.
+  -->
+  <codecFactory class="solr.SchemaCodecFactory"/>
+
+  <!-- To disable dynamic schema REST APIs, use the following for <schemaFactory>:
+  
+       <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+       When ManagedIndexSchemaFactory is specified instead, Solr will load the schema from
+       he resource named in 'managedSchemaResourceName', rather than from schema.xml.
+       Note that the managed schema resource CANNOT be named schema.xml.  If the managed
+       schema does not exist, Solr will create it after reading schema.xml, then rename
+       'schema.xml' to 'schema.xml.bak'. 
+       
+       Do NOT hand edit the managed schema - external modifications will be ignored and
+       overwritten as a result of schema modification REST API calls.
+
+       When ManagedIndexSchemaFactory is specified with mutable = true, schema
+       modification REST API calls will be allowed; otherwise, error responses will be
+       sent back for these requests. 
+  -->
+  <schemaFactory class="ManagedIndexSchemaFactory">
+    <bool name="mutable">true</bool>
+    <str name="managedSchemaResourceName">managed-schema</str>
+  </schemaFactory>
+
+  <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+       Index Config - These settings control low-level behavior of indexing
+       Most example settings here show the default value, but are commented
+       out, to more easily see where customizations have been made.
+       
+       Note: This replaces <indexDefaults> and <mainIndex> from older versions
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
+  <indexConfig>
+    <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a 
+         LimitTokenCountFilterFactory in your fieldType definition. E.g. 
+     <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
+    -->
+    <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
+    <!-- <writeLockTimeout>1000</writeLockTimeout>  -->
+
+    <!-- The maximum number of simultaneous threads that may be
+         indexing documents at once in IndexWriter; if more than this
+         many threads arrive they will wait for others to finish.
+         Default in Solr/Lucene is 8. -->
+    <!-- <maxIndexingThreads>8</maxIndexingThreads>  -->
+
+    <!-- Expert: Enabling compound file will use less files for the index, 
+         using fewer file descriptors on the expense of performance decrease. 
+         Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
+    <!-- <useCompoundFile>false</useCompoundFile> -->
+
+    <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
+         indexing for buffering added documents and deletions before they are
+         flushed to the Directory.
+         maxBufferedDocs sets a limit on the number of documents buffered
+         before flushing.
+         If both ramBufferSizeMB and maxBufferedDocs is set, then
+         Lucene will flush based on whichever limit is hit first.  -->
+    <!-- <ramBufferSizeMB>100</ramBufferSizeMB> -->
+    <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
+
+    <!-- Expert: Merge Policy 
+         The Merge Policy in Lucene controls how merging of segments is done.
+         The default since Solr/Lucene 3.3 is TieredMergePolicy.
+         The default since Lucene 2.3 was the LogByteSizeMergePolicy,
+         Even older versions of Lucene used LogDocMergePolicy.
+      -->
+    <!--
+        <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
+          <int name="maxMergeAtOnce">10</int>
+          <int name="segmentsPerTier">10</int>
+        </mergePolicy>
+      -->
+
+    <!-- Merge Factor
+         The merge factor controls how many segments will get merged at a time.
+         For TieredMergePolicy, mergeFactor is a convenience parameter which
+         will set both MaxMergeAtOnce and SegmentsPerTier at once.
+         For LogByteSizeMergePolicy, mergeFactor decides how many new segments
+         will be allowed before they are merged into one.
+         Default is 10 for both merge policies.
+      -->
+    <!-- 
+    <mergeFactor>10</mergeFactor>
+      -->
+
+    <!-- Expert: Merge Scheduler
+         The Merge Scheduler in Lucene controls how merges are
+         performed.  The ConcurrentMergeScheduler (Lucene 2.3 default)
+         can perform merges in the background using separate threads.
+         The SerialMergeScheduler (Lucene 2.2 default) does not.
+     -->
+    <!-- 
+       <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
+       -->
+
+    <!-- LockFactory 
+
+         This option specifies which Lucene LockFactory implementation
+         to use.
+      
+         single = SingleInstanceLockFactory - suggested for a
+                  read-only index or when there is no possibility of
+                  another process trying to modify the index.
+         native = NativeFSLockFactory - uses OS native file locking.
+                  Do not use when multiple solr webapps in the same
+                  JVM are attempting to share a single index.
+         simple = SimpleFSLockFactory  - uses a plain file for locking
+
+         Defaults: 'native' is default for Solr3.6 and later, otherwise
+                   'simple' is the default
+
+         More details on the nuances of each LockFactory...
+         http://wiki.apache.org/lucene-java/AvailableLockFactories
+    -->
+    <lockType>${solr.lock.type:native}</lockType>
+
+    <!-- Unlock On Startup
+
+         If true, unlock any held write or commit locks on startup.
+         This defeats the locking mechanism that allows multiple
+         processes to safely access a lucene index, and should be used
+         with care. Default is "false".
+
+         This is not needed if lock type is 'single'
+     -->
+    <!--
+    <unlockOnStartup>false</unlockOnStartup>
+      -->
+
+    <!-- Expert: Controls how often Lucene loads terms into memory
+         Default is 128 and is likely good for most everyone.
+      -->
+    <!-- <termIndexInterval>128</termIndexInterval> -->
+
+    <!-- If true, IndexReaders will be reopened (often more efficient)
+         instead of closed and then opened. Default: true
+      -->
+    <!-- 
+    <reopenReaders>true</reopenReaders>
+      -->
+
+    <!-- Commit Deletion Policy
+         Custom deletion policies can be specified here. The class must
+         implement org.apache.lucene.index.IndexDeletionPolicy.
+
+         The default Solr IndexDeletionPolicy implementation supports
+         deleting index commit points on number of commits, age of
+         commit point and optimized status.
+         
+         The latest commit point should always be preserved regardless
+         of the criteria.
+    -->
+    <!-- 
+    <deletionPolicy class="solr.SolrDeletionPolicy">
+    -->
+    <!-- The number of commit points to be kept -->
+    <!-- <str name="maxCommitsToKeep">1</str> -->
+    <!-- The number of optimized commit points to be kept -->
+    <!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
+    <!--
+        Delete all commit points once they have reached the given age.
+        Supports DateMathParser syntax e.g.
+      -->
+    <!--
+       <str name="maxCommitAge">30MINUTES</str>
+       <str name="maxCommitAge">1DAY</str>
+    -->
+    <!-- 
+    </deletionPolicy>
+    -->
+
+    <!-- Lucene Infostream
+       
+         To aid in advanced debugging, Lucene provides an "InfoStream"
+         of detailed information when indexing.
+
+         Setting The value to true will instruct the underlying Lucene
+         IndexWriter to write its debugging info the specified file
+      -->
+    <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> -->
+  </indexConfig>
+
+
+  <!-- JMX
+       
+       This example enables JMX if and only if an existing MBeanServer
+       is found, use this if you want to configure JMX through JVM
+       parameters. Remove this to disable exposing Solr configuration
+       and statistics to JMX.
+
+       For more details see http://wiki.apache.org/solr/SolrJmx
+    -->
+  <jmx />
+  <!-- If you want to connect to a particular server, specify the
+       agentId 
+    -->
+  <!-- <jmx agentId="myAgent" /> -->
+  <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
+  <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
+    -->
+
+  <!-- The default high-performance update handler -->
+  <updateHandler class="solr.DirectUpdateHandler2">
+
+    <!-- Enables a transaction log, used for real-time get, durability, and
+         and solr cloud replica recovery.  The log can grow as big as
+         uncommitted changes to the index, so use of a hard autoCommit
+         is recommended (see below).
+         "dir" - the target directory for transaction logs, defaults to the
+                solr data directory.  -->
+    <updateLog>
+      <str name="dir">${solr.ulog.dir:}</str>
+    </updateLog>
+
+    <!-- AutoCommit
+
+         Perform a hard commit automatically under certain conditions.
+         Instead of enabling autoCommit, consider using "commitWithin"
+         when adding documents. 
+
+         http://wiki.apache.org/solr/UpdateXmlMessages
+
+         maxDocs - Maximum number of documents to add since the last
+                   commit before automatically triggering a new commit.
+
+         maxTime - Maximum amount of time in ms that is allowed to pass
+                   since a document was added before automatically
+                   triggering a new commit. 
+         openSearcher - if false, the commit causes recent index changes
+           to be flushed to stable storage, but does not cause a new
+           searcher to be opened to make those changes visible.
+
+         If the updateLog is enabled, then it's highly recommended to
+         have some sort of hard autoCommit to limit the log size.
+      -->
+    <autoCommit>
+      <maxTime>15000</maxTime>
+      <openSearcher>false</openSearcher>
+    </autoCommit>
+
+    <!-- softAutoCommit is like autoCommit except it causes a
+         'soft' commit which only ensures that changes are visible
+         but does not ensure that data is synced to disk.  This is
+         faster and more near-realtime friendly than a hard commit.
+      -->
+    <!--
+      <autoSoftCommit> 
+        <maxTime>1000</maxTime> 
+      </autoSoftCommit>
+     -->
+
+    <!-- Update Related Event Listeners
+         
+         Various IndexWriter related events can trigger Listeners to
+         take actions.
+
+         postCommit - fired after every commit or optimize command
+         postOptimize - fired after every optimize command
+      -->
+    <!-- The RunExecutableListener executes an external command from a
+         hook such as postCommit or postOptimize.
+         
+         exe - the name of the executable to run
+         dir - dir to use as the current working directory. (default=".")
+         wait - the calling thread waits until the executable returns. 
+                (default="true")
+         args - the arguments to pass to the program.  (default is none)
+         env - environment variables to set.  (default is none)
+      -->
+    <!-- This example shows how RunExecutableListener could be used
+         with the script based replication...
+         http://wiki.apache.org/solr/CollectionDistribution
+      -->
+    <!--
+       <listener event="postCommit" class="solr.RunExecutableListener">
+         <str name="exe">solr/bin/snapshooter</str>
+         <str name="dir">.</str>
+         <bool name="wait">true</bool>
+         <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
+         <arr name="env"> <str>MYVAR=val1</str> </arr>
+       </listener>
+      -->
+
+  </updateHandler>
+
+  <!-- IndexReaderFactory
+
+       Use the following format to specify a custom IndexReaderFactory,
+       which allows for alternate IndexReader implementations.
+
+       ** Experimental Feature **
+
+       Please note - Using a custom IndexReaderFactory may prevent
+       certain other features from working. The API to
+       IndexReaderFactory may change without warning or may even be
+       removed from future releases if the problems cannot be
+       resolved.
+
+
+       ** Features that may not work with custom IndexReaderFactory **
+
+       The ReplicationHandler assumes a disk-resident index. Using a
+       custom IndexReader implementation may cause incompatibility
+       with ReplicationHandler and may cause replication to not work
+       correctly. See SOLR-1366 for details.
+
+    -->
+  <!--
+  <indexReaderFactory name="IndexReaderFactory" class="package.class">
+    <str name="someArg">Some Value</str>
+  </indexReaderFactory >
+  -->
+  <!-- By explicitly declaring the Factory, the termIndexDivisor can
+       be specified.
+    -->
+  <!--
+     <indexReaderFactory name="IndexReaderFactory" 
+                         class="solr.StandardIndexReaderFactory">
+       <int name="setTermIndexDivisor">12</int>
+     </indexReaderFactory >
+    -->
+
+  <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+       Query section - these settings control query time things like caches
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
+  <query>
+    <!-- Max Boolean Clauses
+
+         Maximum number of clauses in each BooleanQuery,  an exception
+         is thrown if exceeded.
+
+         ** WARNING **
+         
+         This option actually modifies a global Lucene property that
+         will affect all SolrCores.  If multiple solrconfig.xml files
+         disagree on this property, the value at any given moment will
+         be based on the last SolrCore to be initialized.
+         
+      -->
+    <maxBooleanClauses>1024</maxBooleanClauses>
+
+
+    <!-- Solr Internal Query Caches
+
+         There are two implementations of cache available for Solr,
+         LRUCache, based on a synchronized LinkedHashMap, and
+         FastLRUCache, based on a ConcurrentHashMap.  
+
+         FastLRUCache has faster gets and slower puts in single
+         threaded operation and thus is generally faster than LRUCache
+         when the hit ratio of the cache is high (> 75%), and may be
+         faster under other scenarios on multi-cpu systems.
+    -->
+
+    <!-- Filter Cache
+
+         Cache used by SolrIndexSearcher for filters (DocSets),
+         unordered sets of *all* documents that match a query.  When a
+         new searcher is opened, its caches may be prepopulated or
+         "autowarmed" using data from caches in the old searcher.
+         autowarmCount is the number of items to prepopulate.  For
+         LRUCache, the autowarmed items will be the most recently
+         accessed items.
+
+         Parameters:
+           class - the SolrCache implementation LRUCache or
+               (LRUCache or FastLRUCache)
+           size - the maximum number of entries in the cache
+           initialSize - the initial capacity (number of entries) of
+               the cache.  (see java.util.HashMap)
+           autowarmCount - the number of entries to prepopulate from
+               and old cache.  
+      -->
+    <filterCache class="solr.FastLRUCache"
+                 size="512"
+                 initialSize="512"
+                 autowarmCount="0"/>
+
+    <!-- Query Result Cache
+         
+         Caches results of searches - ordered lists of document ids
+         (DocList) based on a query, a sort, and the range of documents requested.  
+      -->
+    <queryResultCache class="solr.LRUCache"
+                      size="512"
+                      initialSize="512"
+                      autowarmCount="0"/>
+
+    <!-- Document Cache
+
+         Caches Lucene Document objects (the stored fields for each
+         document).  Since Lucene internal document ids are transient,
+         this cache will not be autowarmed.  
+      -->
+    <documentCache class="solr.LRUCache"
+                   size="512"
+                   initialSize="512"
+                   autowarmCount="0"/>
+
+    <!-- Field Value Cache
+         
+         Cache used to hold field values that are quickly accessible
+         by document id.  The fieldValueCache is created by default
+         even if not configured here.
+      -->
+    <!--
+       <fieldValueCache class="solr.FastLRUCache"
+                        size="512"
+                        autowarmCount="128"
+                        showItems="32" />
+      -->
+
+    <!-- Custom Cache
+
+         Example of a generic cache.  These caches may be accessed by
+         name through SolrIndexSearcher.getCache(),cacheLookup(), and
+         cacheInsert().  The purpose is to enable easy caching of
+         user/application level data.  The regenerator argument should
+         be specified as an implementation of solr.CacheRegenerator 
+         if autowarming is desired.  
+      -->
+    <!--
+       <cache name="myUserCache"
+              class="solr.LRUCache"
+              size="4096"
+              initialSize="1024"
+              autowarmCount="1024"
+              regenerator="com.mycompany.MyRegenerator"
+              />
+      -->
+
+
+    <!-- Lazy Field Loading
+
+         If true, stored fields that are not requested will be loaded
+         lazily.  This can result in a significant speed improvement
+         if the usual case is to not load all stored fields,
+         especially if the skipped fields are large compressed text
+         fields.
+    -->
+    <enableLazyFieldLoading>true</enableLazyFieldLoading>
+
+    <!-- Use Filter For Sorted Query
+ 
+         A possible optimization that attempts to use a filter to
+         satisfy a search.  If the requested sort does not include
+         score, then the filterCache will be checked for a filter
+         matching the query. If found, the filter will be used as the
+         source of document ids, and then the sort will be applied to
+         that.
+ 
+         For most situations, this will not be useful unless you
+         frequently get the same search repeatedly with different sort
+         options, and none of them ever use "score"
+      -->
+    <!--
+       <useFilterForSortedQuery>true</useFilterForSortedQuery>
+      -->
+
+    <!-- Result Window Size
+ 
+         An optimization for use with the queryResultCache.  When a search
+         is requested, a superset of the requested number of document ids
+         are collected.  For example, if a search for a particular query
+         requests matching documents 10 through 19, and queryWindowSize is 50,
+         then documents 0 through 49 will be collected and cached.  Any further
+         requests in that range can be satisfied via the cache.  
+      -->
+    <queryResultWindowSize>20</queryResultWindowSize>
+
+    <!-- Maximum number of documents to cache for any entry in the
+         queryResultCache. 
+      -->
+    <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
+
+    <!-- Query Related Event Listeners
+ 
+         Various IndexSearcher related events can trigger Listeners to
+         take actions.
+ 
+         newSearcher - fired whenever a new searcher is being prepared
+         and there is a current searcher handling requests (aka
+         registered).  It can be used to prime certain caches to
+         prevent long request times for certain requests.
+ 
+         firstSearcher - fired whenever a new searcher is being
+         prepared but there is no current registered searcher to handle
+         requests or to gain autowarming data from.
+ 
+         
+      -->
+    <!-- QuerySenderListener takes an array of NamedList and executes a
+         local query request for each NamedList in sequence. 
+      -->
+    <listener event="newSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <!--
+           <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
+           <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
+          -->
+      </arr>
+    </listener>
+    <listener event="firstSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <!--
+        <lst>
+          <str name="q">static firstSearcher warming in solrconfig.xml</str>
+        </lst>
+        -->
+      </arr>
+    </listener>
+
+    <!-- Use Cold Searcher
+
+         If a search request comes in and there is no current
+         registered searcher, then immediately register the still
+         warming searcher and use it.  If "false" then all requests
+         will block until the first searcher is done warming.
+      -->
+    <useColdSearcher>false</useColdSearcher>
+
+    <!-- Max Warming Searchers
+         
+         Maximum number of searchers that may be warming in the
+         background concurrently.  An error is returned if this limit
+         is exceeded.
+
+         Recommend values of 1-2 for read-only slaves, higher for
+         masters w/o cache warming.
+      -->
+    <maxWarmingSearchers>2</maxWarmingSearchers>
+
+  </query>
+
+
+  <!-- Request Dispatcher
+
+       This section contains instructions for how the SolrDispatchFilter
+       should behave when processing requests for this SolrCore.
+
+       handleSelect is a legacy option that affects the behavior of requests
+       such as /select?qt=XXX
+
+       handleSelect="true" will cause the SolrDispatchFilter to process
+       the request and dispatch the query to a handler specified by the 
+       "qt" param, assuming "/select" isn't already registered.
+
+       handleSelect="false" will cause the SolrDispatchFilter to
+       ignore "/select" requests, resulting in a 404 unless a handler
+       is explicitly registered with the name "/select"
+
+       handleSelect="true" is not recommended for new users, but is the default
+       for backwards compatibility
+    -->
+  <requestDispatcher handleSelect="false" >
+    <!-- Request Parsing
+
+         These settings indicate how Solr Requests may be parsed, and
+         what restrictions may be placed on the ContentStreams from
+         those requests
+
+         enableRemoteStreaming - enables use of the stream.file
+         and stream.url parameters for specifying remote streams.
+
+         multipartUploadLimitInKB - specifies the max size (in KiB) of
+         Multipart File Uploads that Solr will allow in a Request.
+         
+         formdataUploadLimitInKB - specifies the max size (in KiB) of
+         form data (application/x-www-form-urlencoded) sent via
+         POST. You can use POST to pass request parameters not
+         fitting into the URL.
+         
+         addHttpRequestToContext - if set to true, it will instruct
+         the requestParsers to include the original HttpServletRequest
+         object in the context map of the SolrQueryRequest under the 
+         key "httpRequest". It will not be used by any of the existing
+         Solr components, but may be useful when developing custom 
+         plugins.
+         
+         *** WARNING ***
+         The settings below authorize Solr to fetch remote files, You
+         should make sure your system has some authentication before
+         using enableRemoteStreaming="true"
+
+      -->
+    <requestParsers enableRemoteStreaming="true"
+                    multipartUploadLimitInKB="2048000"
+                    formdataUploadLimitInKB="2048"
+                    addHttpRequestToContext="false"/>
+
+    <!-- HTTP Caching
+
+         Set HTTP caching related parameters (for proxy caches and clients).
+
+         The options below instruct Solr not to output any HTTP Caching
+         related headers
+      -->
+    <httpCaching never304="true" />
+    <!-- If you include a <cacheControl> directive, it will be used to
+         generate a Cache-Control header (as well as an Expires header
+         if the value contains "max-age=")
+         
+         By default, no Cache-Control header is generated.
+         
+         You can use the <cacheControl> option even if you have set
+         never304="true"
+      -->
+    <!--
+       <httpCaching never304="true" >
+         <cacheControl>max-age=30, public</cacheControl> 
+       </httpCaching>
+      -->
+    <!-- To enable Solr to respond with automatically generated HTTP
+         Caching headers, and to response to Cache Validation requests
+         correctly, set the value of never304="false"
+         
+         This will cause Solr to generate Last-Modified and ETag
+         headers based on the properties of the Index.
+
+         The following options can also be specified to affect the
+         values of these headers...
+
+         lastModFrom - the default value is "openTime" which means the
+         Last-Modified value (and validation against If-Modified-Since
+         requests) will all be relative to when the current Searcher
+         was opened.  You can change it to lastModFrom="dirLastMod" if
+         you want the value to exactly correspond to when the physical
+         index was last modified.
+
+         etagSeed="..." is an option you can change to force the ETag
+         header (and validation against If-None-Match requests) to be
+         different even if the index has not changed (ie: when making
+         significant changes to your config file)
+
+         (lastModifiedFrom and etagSeed are both ignored if you use
+         the never304="true" option)
+      -->
+    <!--
+       <httpCaching lastModifiedFrom="openTime"
+                    etagSeed="Solr">
+         <cacheControl>max-age=30, public</cacheControl> 
+       </httpCaching>
+      -->
+  </requestDispatcher>
+
+  <!-- Request Handlers 
+
+       http://wiki.apache.org/solr/SolrRequestHandler
+
+       Incoming queries will be dispatched to a specific handler by name
+       based on the path specified in the request.
+
+       Legacy behavior: If the request path uses "/select" but no Request
+       Handler has that name, and if handleSelect="true" has been specified in
+       the requestDispatcher, then the Request Handler is dispatched based on
+       the qt parameter.  Handlers without a leading '/' are accessed this way
+       like so: http://host/app/[core/]select?qt=name  If no qt is
+       given, then the requestHandler that declares default="true" will be
+       used or the one named "standard".
+
+       If a Request Handler is declared with startup="lazy", then it will
+       not be initialized until the first request that uses it.
+
+    -->
+  <!-- SearchHandler
+
+       http://wiki.apache.org/solr/SearchHandler
+
+       For processing Search Queries, the primary Request Handler
+       provided with Solr is "SearchHandler" It delegates to a sequent
+       of SearchComponents (see below) and supports distributed
+       queries across multiple shards
+    -->
+  <requestHandler name="/select" class="solr.SearchHandler">
+    <!-- default values for query parameters can be specified, these
+         will be overridden by parameters in the request
+      -->
+    <lst name="defaults">
+      <str name="echoParams">explicit</str>
+      <int name="rows">10</int>
+      <!-- <str name="df">text</str> -->
+    </lst>
+    <!-- In addition to defaults, "appends" params can be specified
+         to identify values which should be appended to the list of
+         multi-val params from the query (or the existing "defaults").
+      -->
+    <!-- In this example, the param "fq=instock:true" would be appended to
+         any query time fq params the user may specify, as a mechanism for
+         partitioning the index, independent of any user selected filtering
+         that may also be desired (perhaps as a result of faceted searching).
+
+         NOTE: there is *absolutely* nothing a client can do to prevent these
+         "appends" values from being used, so don't use this mechanism
+         unless you are sure you always want it.
+      -->
+    <!--
+       <lst name="appends">
+         <str name="fq">inStock:true</str>
+       </lst>
+      -->
+    <!-- "invariants" are a way of letting the Solr maintainer lock down
+         the options available to Solr clients.  Any params values
+         specified here are used regardless of what values may be specified
+         in either the query, the "defaults", or the "appends" params.
+
+         In this example, the facet.field and facet.query params would
+         be fixed, limiting the facets clients can use.  Faceting is
+         not turned on by default - but if the client does specify
+         facet=true in the request, these are the only facets they
+         will be able to see counts for; regardless of what other
+         facet.field or facet.query params they may specify.
+
+         NOTE: there is *absolutely* nothing a client can do to prevent these
+         "invariants" values from being used, so don't use this mechanism
+         unless you are sure you always want it.
+      -->
+    <!--
+       <lst name="invariants">
+         <str name="facet.field">cat</str>
+         <str name="facet.field">manu_exact</str>
+         <str name="facet.query">price:[* TO 500]</str>
+         <str name="facet.query">price:[500 TO *]</str>
+       </lst>
+      -->
+    <!-- If the default list of SearchComponents is not desired, that
+         list can either be overridden completely, or components can be
+         prepended or appended to the default list.  (see below)
+      -->
+    <!--
+       <arr name="components">
+         <str>nameOfCustomComponent1</str>
+         <str>nameOfCustomComponent2</str>
+       </arr>
+      -->
+  </requestHandler>
+
+  <!-- A request handler that returns indented JSON by default -->
+  <requestHandler name="/query" class="solr.SearchHandler">
+    <lst name="defaults">
+      <str name="echoParams">explicit</str>
+      <str name="wt">json</str>
+      <str name="indent">true</str>
+      <str name="df">text</str>
+    </lst>
+  </requestHandler>
+
+
+  <!-- realtime get handler, guaranteed to return the latest stored fields of
+       any document, without the need to commit or open a new searcher.  The
+       current implementation relies on the updateLog feature being enabled. -->
+  <requestHandler name="/get" class="solr.RealTimeGetHandler">
+    <lst name="defaults">
+      <str name="omitHeader">true</str>
+      <str name="wt">json</str>
+      <str name="indent">true</str>
+    </lst>
+  </requestHandler>
+
+
+  <!-- A Robust Example 
+       
+       This example SearchHandler declaration shows off usage of the
+       SearchHandler with many defaults declared
+
+       Note that multiple instances of the same Request Handler
+       (SearchHandler) can be registered multiple times with different
+       names (and different init parameters)
+    -->
+  <requestHandler name="/browse" class="solr.SearchHandler">
+    <lst name="defaults">
+      <str name="echoParams">explicit</str>
+
+      <!-- VelocityResponseWriter settings -->
+      <str name="wt">velocity</str>
+      <str name="v.template">browse</str>
+      <str name="v.layout">layout</str>
+      <str name="title">Solritas</str>
+
+      <!-- Query settings -->
+      <str name="defType">edismax</str>
+      <str name="qf">
+        text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+        title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+      </str>
+      <str name="df">text</str>
+      <str name="mm">100%</str>
+      <str name="q.alt">*:*</str>
+      <str name="rows">10</str>
+      <str name="fl">*,score</str>
+
+      <str name="mlt.qf">
+        text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+        title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+      </str>
+      <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
+      <int name="mlt.count">3</int>
+
+      <!-- Faceting defaults -->
+      <str name="facet">on</str>
+      <str name="facet.field">cat</str>
+      <str name="facet.field">manu_exact</str>
+      <str name="facet.field">content_type</str>
+      <str name="facet.field">author_s</str>
+      <str name="facet.query">ipod</str>
+      <str name="facet.query">GB</str>
+      <str name="facet.mincount">1</str>
+      <str name="facet.pivot">cat,inStock</str>
+      <str name="facet.range.other">after</str>
+      <str name="facet.range">price</str>
+      <int name="f.price.facet.range.start">0</int>
+      <int name="f.price.facet.range.end">600</int>
+      <int name="f.price.facet.range.gap">50</int>
+      <str name="facet.range">popularity</str>
+      <int name="f.popularity.facet.range.start">0</int>
+      <int name="f.popularity.facet.range.end">10</int>
+      <int name="f.popularity.facet.range.gap">3</int>
+      <str name="facet.range">manufacturedate_dt</str>
+      <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
+      <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
+      <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
+      <str name="f.manufacturedate_dt.facet.range.other">before</str>
+      <str name="f.manufacturedate_dt.facet.range.other">after</str>
+
+      <!-- Highlighting defaults -->
+      <str name="hl">on</str>
+      <str name="hl.fl">content features title name</str>
+      <str name="hl.encoder">html</str>
+      <str name="hl.simple.pre">&lt;b&gt;</str>
+      <str name="hl.simple.post">&lt;/b&gt;</str>
+      <str name="f.title.hl.fragsize">0</str>
+      <str name="f.title.hl.alternateField">title</str>
+      <str name="f.name.hl.fragsize">0</str>
+      <str name="f.name.hl.alternateField">name</str>
+      <str name="f.content.hl.snippets">3</str>
+      <str name="f.content.hl.fragsize">200</str>
+      <str name="f.content.hl.alternateField">content</str>
+      <str name="f.content.hl.maxAlternateFieldLength">750</str>
+
+      <!-- Spell checking defaults -->
+      <str name="spellcheck">on</str>
+      <str name="spellcheck.extendedResults">false</str>
+      <str name="spellcheck.count">5</str>
+      <str name="spellcheck.alternativeTermCount">2</str>
+      <str name="spellcheck.maxResultsForSuggest">5</str>
+      <str name="spellcheck.collate">true</str>
+      <str name="spellcheck.collateExtendedResults">true</str>
+      <str name="spellcheck.maxCollationTries">5</str>
+      <str name="spellcheck.maxCollations">3</str>
+    </lst>
+
+    <!-- append spellchecking to our list of components -->
+    <arr name="last-components">
+      <str>spellcheck</str>
+    </arr>
+  </requestHandler>
+
+
+  <!-- Update Request Handler.  
+       
+       http://wiki.apache.org/solr/UpdateXmlMessages
+
+       The canonical Request Handler for Modifying the Index through
+       commands specified using XML, JSON, CSV, or JAVABIN
+
+       Note: Since solr1.1 requestHandlers requires a valid content
+       type header if posted in the body. For example, curl now
+       requires: -H 'Content-type:text/xml; charset=utf-8'
+       
+       To override the request content type and force a specific 
+       Content-type, use the request parameter: 
+         ?update.contentType=text/csv
+       
+       This handler will pick a response format to match the input
+       if the 'wt' parameter is not explicit
+    -->
+  <requestHandler name="/update" class="solr.UpdateRequestHandler">
+    <!-- See below for information on defining 
+         updateRequestProcessorChains that can be used by name 
+         on each Update Request
+      -->
+    <lst name="defaults">
+      <str name="update.chain">add-unknown-fields-to-the-schema</str>
+    </lst>
+  </requestHandler>
+
+  <!-- for back compat with clients using /update/json and /update/csv -->
+  <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
+    <lst name="defaults">
+      <str name="stream.contentType">application/json</str>
+      <str name="update.chain">add-unknown-fields-to-the-schema</str>
+    </lst>
+  </requestHandler>
+  <requestHandler name="/update/csv" class="solr.CSVRequestHandler">
+    <lst name="defaults">
+      <str name="stream.contentType">application/csv</str>
+      <str name="update.chain">add-unknown-fields-to-the-schema</str>
+    </lst>
+  </requestHandler>
+
+  <!-- Solr Cell Update Request Handler
+
+       http://wiki.apache.org/solr/ExtractingRequestHandler 
+
+    -->
+  <requestHandler name="/update/extract"
+                  startup="lazy"
+                  class="solr.extraction.ExtractingRequestHandler" >
+    <lst name="defaults">
+      <str name="lowernames">true</str>
+      <str name="uprefix">ignored_</str>
+
+      <!-- capture link hrefs but ignore div attributes -->
+      <str name="captureAttr">true</str>
+      <str name="fmap.a">links</str>
+      <str name="fmap.div">ignored_</str>
+    </lst>
+  </requestHandler>
+
+
+  <!-- Field Analysis Request Handler
+
+       RequestHandler that provides much the same functionality as
+       analysis.jsp. Provides the ability to specify multiple field
+       types and field names in the same request and outputs
+       index-time and query-time analysis for each of them.
+
+       Request parameters are:
+       analysis.fieldname - field name whose analyzers are to be used
+
+       analysis.fieldtype - field type whose analyzers are to be used
+       analysis.fieldvalue - text for index-time analysis
+       q (or analysis.q) - text for query time analysis
+       analysis.showmatch (true|false) - When set to true and when
+           query analysis is performed, the produced tokens of the
+           field value analysis will be marked as "matched" for every
+           token that is produces by the query analysis
+   -->
+  <requestHandler name="/analysis/field"
+                  startup="lazy"
+                  class="solr.FieldAnalysisRequestHandler" />
+
+
+  <!-- Document Analysis Handler
+
+       http://wiki.apache.org/solr/AnalysisRequestHandler
+
+       An analysis handler that provides a breakdown of the analysis
+       process of provided documents. This handler expects a (single)
+       content stream with the following format:
+
+       <docs>
+         <doc>
+           <field name="id">1</field>
+           <field name="name">The Name</field>
+           <field name="text">The Text Value</field>
+         </doc>
+         <doc>...</doc>
+         <doc>...</doc>
+         ...
+       </docs>
+
+    Note: Each document must contain a field which serves as the
+    unique key. This key is used in the returned response to associate
+    an analysis breakdown to the analyzed document.
+
+    Like the FieldAnalysisRequestHandler, this handler also supports
+    query analysis by sending either an "analysis.query" or "q"
+    request parameter that holds the query text to be analyzed. It
+    also supports the "analysis.showmatch" parameter which when set to
+    true, all field tokens that match the query tokens will be marked
+    as a "match". 
+  -->
+  <requestHandler name="/analysis/document"
+                  class="solr.DocumentAnalysisRequestHandler"
+                  startup="lazy" />
+
+  <!-- Admin Handlers
+
+       Admin Handlers - This will register all the standard admin
+       RequestHandlers.  
+    -->
+  <requestHandler name="/admin/"
+                  class="solr.admin.AdminHandlers" />
+  <!-- This single handler is equivalent to the following... -->
+  <!--
+     <requestHandler name="/admin/luke"       class="solr.admin.LukeRequestHandler" />
+     <requestHandler name="/admin/system"     class="solr.admin.SystemInfoHandler" />
+     <requestHandler name="/admin/plugins"    class="solr.admin.PluginInfoHandler" />
+     <requestHandler name="/admin/threads"    class="solr.admin.ThreadDumpHandler" />
+     <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />
+     <requestHandler name="/admin/file"       class="solr.admin.ShowFileRequestHandler" >
+    -->
+  <!-- If you wish to hide files under ${solr.home}/conf, explicitly
+       register the ShowFileRequestHandler using: 
+    -->
+  <!--
+     <requestHandler name="/admin/file" 
+                     class="solr.admin.ShowFileRequestHandler" >
+       <lst name="invariants">
+         <str name="hidden">synonyms.txt</str> 
+         <str name="hidden">anotherfile.txt</str> 
+       </lst>
+     </requestHandler>
+    -->
+
+  <!-- ping/healthcheck -->
+  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
+    <lst name="invariants">
+      <str name="q">solrpingquery</str>
+    </lst>
+    <lst name="defaults">
+      <str name="echoParams">all</str>
+    </lst>
+    <!-- An optional feature of the PingRequestHandler is to configure the 
+         handler with a "healthcheckFile" which can be used to enable/disable 
+         the PingRequestHandler.
+         relative paths are resolved against the data dir 
+      -->
+    <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
+  </requestHandler>
+
+  <!-- Echo the request contents back to the client -->
+  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
+    <lst name="defaults">
+      <str name="echoParams">explicit</str>
+      <str name="echoHandler">true</str>
+    </lst>
+  </requestHandler>
+
+  <!-- Solr Replication
+
+       The SolrReplicationHandler supports replicating indexes from a
+       "master" used for indexing and "slaves" used for queries.
+
+       http://wiki.apache.org/solr/SolrReplication 
+
+       It is also necessary for SolrCloud to function (in Cloud mode, the
+       replication handler is used to bulk transfer segments when nodes 
+       are added or need to recover).
+
+       https://wiki.apache.org/solr/SolrCloud/
+    -->
+  <requestHandler name="/replication" class="solr.ReplicationHandler" >
+    <!--
+       To enable simple master/slave replication, uncomment one of the 
+       sections below, depending on whether this solr instance should be
+       the "master" or a "slave".  If this instance is a "slave" you will 
+       also need to fill in the masterUrl to point to a real machine.
+    -->
+    <!--
+       <lst name="master">
+         <str name="replicateAfter">commit</str>
+         <str name="replicateAfter">startup</str>
+         <str name="confFiles">schema.xml,stopwords.txt</str>
+       </lst>
+    -->
+    <!--
+       <lst name="slave">
+         <str name="masterUrl">http://your-master-hostname:8983/solr</str>
+         <str name="pollInterval">00:00:60</str>
+       </lst>
+    -->
+  </requestHandler>
+
+  <!-- Search Components
+
+       Search components are registered to SolrCore and used by 
+       instances of SearchHandler (which can access them by name)
+       
+       By default, the following components are available:
+       
+       <searchComponent name="query"     class="solr.QueryComponent" />
+       <searchComponent name="facet"     class="solr.FacetComponent" />
+       <searchComponent name="mlt"       class="solr.MoreLikeThisComponent" />
+       <searchComponent name="highlight" class="solr.HighlightComponent" />
+       <searchComponent name="stats"     class="solr.StatsComponent" />
+       <searchComponent name="debug"     class="solr.DebugComponent" />
+   
+       Default configuration in a requestHandler would look like:
+
+       <arr name="components">
+         <str>query</str>
+         <str>facet</str>
+         <str>mlt</str>
+         <str>highlight</str>
+         <str>stats</str>
+         <str>debug</str>
+       </arr>
+
+       If you register a searchComponent to one of the standard names, 
+       that will be used instead of the default.
+
+       To insert components before or after the 'standard' components, use:
+    
+       <arr name="first-components">
+         <str>myFirstComponentName</str>
+       </arr>
+    
+       <arr name="last-components">
+         <str>myLastComponentName</str>
+       </arr>
+
+       NOTE: The component registered with the name "debug" will
+       always be executed after the "last-components" 
+       
+     -->
+
+  <!-- Spell Check
+
+       The spell check component can return a list of alternative spelling
+       suggestions.  
+
+       http://wiki.apache.org/solr/SpellCheckComponent
+    -->
+  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
+
+    <str name="queryAnalyzerFieldType">text_general</str>
+
+    <!-- Multiple "Spell Checkers" can be declared and used by this
+         component
+      -->
+
+    <!-- a spellchecker built from a field of the main index -->
+    <lst name="spellchecker">
+      <str name="name">default</str>
+      <str name="field">text</str>
+      <str name="classname">solr.DirectSolrSpellChecker</str>
+      <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
+      <str name="distanceMeasure">internal</str>
+      <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
+      <float name="accuracy">0.5</float>
+      <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
+      <int name="maxEdits">2</int>
+      <!-- the minimum shared prefix when enumerating terms -->
+      <int name="minPrefix">1</int>
+      <!-- maximum number of inspections per result. -->
+      <int name="maxInspections">5</int>
+      <!-- minimum length of a query term to be considered for correction -->
+      <int name="minQueryLength">4</int>
+      <!-- maximum threshold of documents a query term can appear to be considered for correction -->
+      <float name="maxQueryFrequency">0.01</float>
+      <!-- uncomment this to require suggestions to occur in 1% of the documents
+       <float name="thresholdTokenFrequency">.01</float>
+      -->
+    </lst>
+
+    <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage -->
+    <lst name="spellchecker">
+      <str name="name">wordbreak</str>
+      <str name="classname">solr.WordBreakSolrSpellChecker</str>
+      <str name="field">name</str>
+      <str name="combineWords">true</str>
+      <str name="breakWords">true</str>
+      <int name="maxChanges">10</int>
+    </lst>
+
+    <!-- a spellchecker that uses a different distance measure -->
+    <!--
+       <lst name="spellchecker">
+         <str name="name">jarowinkler</str>
+         <str name="field">spell</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
+         <str name="distanceMeasure">
+           org.apache.lucene.search.spell.JaroWinklerDistance
+         </str>
+       </lst>
+     -->
+
+    <!-- a spellchecker that use an alternate comparator 
+
+         comparatorClass be one of:
+          1. score (default)
+          2. freq (Frequency first, then score)
+          3. A fully qualified class name
+      -->
+    <!--
+       <lst name="spellchecker">
+         <str name="name">freq</str>
+         <str name="field">lowerfilt</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
+         <str name="comparatorClass">freq</str>
+      -->
+
+    <!-- A spellchecker that reads the list of words from a file -->
+    <!--
+       <lst name="spellchecker">
+         <str name="classname">solr.FileBasedSpellChecker</str>
+         <str name="name">file</str>
+         <str name="sourceLocation">spellings.txt</str>
+         <str name="characterEncoding">UTF-8</str>
+         <str name="spellcheckIndexDir">spellcheckerFile</str>
+       </lst>
+      -->
+  </searchComponent>
+
+  <!-- A request handler for demonstrating the spellcheck component.  
+
+       NOTE: This is purely as an example.  The whole purpose of the
+       SpellCheckComponent is to hook it into the request handler that
+       handles your normal user queries so that a separate request is
+       not needed to get suggestions.
+
+       IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
+       NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
+       
+       See http://wiki.apache.org/solr/SpellCheckComponent for details
+       on the request parameters.
+    -->
+  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <str name="df">text</str>
+      <!-- Solr will use suggestions from both the 'default' spellchecker
+           and from the 'wordbreak' spellchecker and combine them.
+           collations (re-written queries) can include a combination of
+           corrections from both spellcheckers -->
+      <str name="spellcheck.dictionary">default</str>
+      <str name="spellcheck.dictionary">wordbreak</str>
+      <str name="spellcheck">on</str>
+      <str name="spellcheck.extendedResults">true</str>
+      <str name="spellcheck.count">10</str>
+      <str name="spellcheck.alternativeTermCount">5</str>
+      <str name="spellcheck.maxResultsForSuggest">5</str>
+      <str name="spellcheck.collate">true</str>
+      <str name="spellcheck.collateExtendedResults">true</str>
+      <str name="spellcheck.maxCollationTries">10</str>
+      <str name="spellcheck.maxCollations">5</str>
+    </lst>
+    <arr name="last-components">
+      <str>spellcheck</str>
+    </arr>
+  </requestHandler>
+
+  <!-- Term Vector Component
+
+       http://wiki.apache.org/solr/TermVectorComponent
+    -->
+  <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
+
+  <!-- A request handler for demonstrating the term vector component
+
+       This is purely as an example.
+
+       In reality you will likely want to add the component to your 
+       already specified request handlers. 
+    -->
+  <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <str name="df">text</str>
+      <bool name="tv">true</bool>
+    </lst>
+    <arr name="last-components">
+      <str>tvComponent</str>
+    </arr>
+  </requestHandler>
+
+  <!-- Clustering Component
+
+       http://wiki.apache.org/solr/ClusteringComponent
+
+       You'll need to set the solr.clustering.enabled system property
+       when running solr to run with clustering enabled:
+
+            java -Dsolr.clustering.enabled=true -jar start.jar
+
+    -->
+  <searchComponent name="clustering"
+                   enable="${solr.clustering.enabled:false}"
+                   class="solr.clustering.ClusteringComponent" >
+    <!-- Declare an engine -->
+    <lst name="engine">
+      <!-- The name, only one can be named "default" -->
+      <str name="name">default</str>
+
+      <!-- Class name of Carrot2 clustering algorithm.
+
+           Currently available algorithms are:
+           
+           * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+           * org.carrot2.clustering.stc.STCClusteringAlgorithm
+           * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
+           
+           See http://project.carrot2.org/algorithms.html for the
+           algorithm's characteristics.
+        -->
+      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+
+      <!-- Overriding values for Carrot2 default algorithm attributes.
+
+           For a description of all available attributes, see:
+           http://download.carrot2.org/stable/manual/#chapter.components.
+           Use attribute key as name attribute of str elements
+           below. These can be further overridden for individual
+           requests by specifying attribute key as request parameter
+           name and attribute value as parameter value.
+        -->
+      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
+
+      <!-- Location of Carrot2 lexical resources.
+
+           A directory from which to load Carrot2-specific stop words
+           and stop labels. Absolute or relative to Solr config directory.
+           If a specific resource (e.g. stopwords.en) is present in the
+           specified dir, it will completely override the corresponding
+           default one that ships with Carrot2.
+
+           For an overview of Carrot2 lexical resources, see:
+           http://download.carrot2.org/head/manual/#chapter.lexical-resources
+        -->
+      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
+
+      <!-- The language to assume for the documents.
+
+           For a list of allowed values, see:
+           http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
+       -->
+      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
+    </lst>
+    <lst name="engine">
+      <str name="name">stc</str>
+      <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
+    </lst>
+  </searchComponent>
+
+  <!-- A request handler for demonstrating the clustering component
+
+       This is purely as an example.
+
+       In reality you will likely want to add the component to your 
+       already specified request handlers. 
+    -->
+  <requestHandler name="/clustering"
+                  startup="lazy"
+                  enable="${solr.clustering.enabled:false}"
+                  class="solr.SearchHandler">
+    <lst name="defaults">
+      <bool name="clustering">true</bool>
+      <str name="clustering.engine">default</str>
+      <bool name="clustering.results">true</bool>
+      <!-- The title field -->
+      <str name="carrot.title">name</str>
+      <str name="carrot.url">id</str>
+      <!-- The field to cluster on -->
+      <str name="carrot.snippet">features</str>
+      <!-- produce summaries -->
+      <bool name="carrot.produceSummary">true</bool>
+      <!-- the maximum number of labels per cluster -->
+      <!--<int name="carrot.numDescriptions">5</int>-->
+      <!-- produce sub clusters -->
+      <bool name="carrot.outputSubClusters">false</bool>
+
+      <str name="defType">edismax</str>
+      <str name="qf">
+        text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+      </str>
+      <str name="q.alt">*:*</str>
+      <str name="rows">10</str>
+      <str name="fl">*,score</str>
+    </lst>
+    <arr name="last-components">
+      <str>clustering</str>
+    </arr>
+  </requestHandler>
+
+  <!-- Terms Component
+
+       http://wiki.apache.org/solr/TermsComponent
+
+       A component to return terms and document frequency of those
+       terms
+    -->
+  <searchComponent name="terms" class="solr.TermsComponent"/>
+
+  <!-- A request handler for demonstrating the terms component -->
+  <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <bool name="terms">true</bool>
+      <bool name="distrib">false</bool>
+    </lst>
+    <arr name="components">
+      <str>terms</str>
+    </arr>
+  </requestHandler>
+
+
+  <!-- Query Elevation Component
+
+       http://wiki.apache.org/solr/QueryElevationComponent
+
+       a search component that enables you to configure the top
+       results for a given query regardless of the normal lucene
+       scoring.
+    -->
+  <searchComponent name="elevator" class="solr.QueryElevationComponent" >
+    <!-- pick a fieldType to analyze queries -->
+    <str name="queryFieldType">string</str>
+    <str name="config-file">elevate.xml</str>
+  </searchComponent>
+
+  <!-- A request handler for demonstrating the elevator component -->
+  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <str name="echoParams">explicit</str>
+      <str name="df">text</str>
+    </lst>
+    <arr name="last-components">
+      <str>elevator</str>
+    </arr>
+  </requestHandler>
+
+  <!-- Highlighting Component
+
+       http://wiki.apache.org/solr/HighlightingParameters
+    -->
+  <searchComponent class="solr.HighlightComponent" name="highlight">
+    <highlighting>
+      <!-- Configure the standard fragmenter -->
+      <!-- This could most likely be commented out in the "default" case -->
+      <fragmenter name="gap"
+                  default="true"
+                  class="solr.highlight.GapFragmenter">
+        <lst name="defaults">
+          <int name="hl.fragsize">100</int>
+        </lst>
+      </fragmenter>
+
+      <!-- A regular-expression-based fragmenter 
+           (for sentence extraction) 
+        -->
+      <fragmenter name="regex"
+                  class="solr.highlight.RegexFragmenter">
+        <lst name="defaults">
+          <!-- slightly smaller fragsizes work better because of slop -->
+          <int name="hl.fragsize">70</int>
+          <!-- allow 50% slop on fragment sizes -->
+          <float name="hl.regex.slop">0.5</float>
+          <!-- a basic sentence pattern -->
+          <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
+        </lst>
+      </fragmenter>
+
+      <!-- Configure the standard formatter -->
+      <formatter name="html"
+                 default="true"
+                 class="solr.highlight.HtmlFormatter">
+        <lst name="defaults">
+          <str name="hl.simple.pre"><![CDATA[<em>]]></str>
+          <str name="hl.simple.post"><![CDATA[</em>]]></str>
+        </lst>
+      </formatter>
+
+      <!-- Configure the standard encoder -->
+      <encoder name="html"
+               class="solr.highlight.HtmlEncoder" />
+
+      <!-- Configure the standard fragListBuilder -->
+      <fragListBuilder name="simple"
+                       class="solr.highlight.SimpleFragListBuilder"/>
+
+      <!-- Configure the single fragListBuilder -->
+      <fragListBuilder name="single"
+                       class="solr.highlight.SingleFragListBuilder"/>
+
+      <!-- Configure the weighted fragListBuilder -->
+      <fragListBuilder name="weighted"
+                       default="true"
+                       class="solr.highlight.WeightedFragListBuilder"/>
+
+      <!-- default tag FragmentsBuilder -->
+      <fragmentsBuilder name="default"
+                        default="true"
+                        class="solr.highlight.ScoreOrderFragmentsBuilder">
+        <!-- 
+        <lst name="defaults">
+          <str name="hl.multiValuedSeparatorChar">/</str>
+        </lst>
+        -->
+      </fragmentsBuilder>
+
+      <!-- multi-colored tag FragmentsBuilder -->
+      <fragmentsBuilder name="colored"
+                        class="solr.highlight.ScoreOrderFragmentsBuilder">
+        <lst name="defaults">
+          <str name="hl.tag.pre"><![CDATA[
+               <b style="background:yellow">,<b style="background:lawgreen">,
+               <b style="background:aquamarine">,<b style="background:magenta">,
+               <b style="background:palegreen">,<b style="background:coral">,
+               <b style="background:wheat">,<b style="background:khaki">,
+               <b style="background:lime">,<b style="background:deepskyblue">]]></str>
+          <str name="hl.tag.post"><![CDATA[</b>]]></str>
+        </lst>
+      </fragmentsBuilder>
+
+      <boundaryScanner name="default"
+                       default="true"
+                       class="solr.highlight.SimpleBoundaryScanner">
+        <lst name="defaults">
+          <str name="hl.bs.maxScan">10</str>
+          <str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
+        </lst>
+      </boundaryScanner>
+
+      <boundaryScanner name="breakIterator"
+                       class="solr.highlight.BreakIteratorBoundaryScanner">
+        <lst name="defaults">
+          <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
+          <str name="hl.bs.type">WORD</str>
+          <!-- language and country are used when constructing Locale object.  -->
+          <!-- And the Locale object will be used when getting instance of BreakIterator -->
+          <str name="hl.bs.language">en</str>
+          <str name="hl.bs.country">US</str>
+        </lst>
+      </boundaryScanner>
+    </highlighting>
+  </searchComponent>
+
+  <!-- Update Processors
+
+       Chains of Update Processor Factories for dealing with Update
+       Requests can be declared, and then used by name in Update
+       Request Processors
+
+       http://wiki.apache.org/solr/UpdateRequestProcessor
+
+    -->
+  
+  <!-- Add unknown fields to the schema 
+  
+       An example field type guessing update processor that will
+       attempt to parse string-typed field values as Booleans, Longs,
+       Doubles, or Dates, and then add schema fields with the guessed
+       field types.  
+       
+       This requires that the schema is both managed and mutable, by
+       declaring schemaFactory as ManagedIndexSchemaFactory, with
+       mutable specified as true. 
+       
+       See http://wiki.apache.org/solr/GuessingFieldTypes
+    -->
+  <updateRequestProcessorChain name="add-unknown-fields-to-the-schema">
+    <processor class="solr.RemoveBlankFieldUpdateProcessorFactory"/>
+    <processor class="solr.ParseBooleanFieldUpdateProcessorFactory"/>
+    <processor class="solr.ParseLongFieldUpdateProcessorFactory"/>
+    <processor class="solr.ParseDoubleFieldUpdateProcessorFactory"/>
+    <processor class="solr.ParseDateFieldUpdateProcessorFactory">
+      <arr name="format">
+        <str>yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
+        <str>yyyy-MM-dd'T'HH:mm:ss,SSSZ</str>
+        <str>yyyy-MM-dd'T'HH:mm:ss.SSS</str>
+        <str>yyyy-MM-dd'T'HH:mm:ss,SSS</str>
+        <str>yyyy-MM-dd'T'HH:mm:ssZ</str>
+        <str>yyyy-MM-dd'T'HH:mm:ss</str>
+        <str>yyyy-MM-dd'T'HH:mmZ</str>
+        <str>yyyy-MM-dd'T'HH:mm</str>
+        <str>yyyy-MM-dd HH:mm:ss.SSSZ</str>
+        <str>yyyy-MM-dd HH:mm:ss,SSSZ</str>
+        <str>yyyy-MM-dd HH:mm:ss.SSS</str>
+        <str>yyyy-MM-dd HH:mm:ss,SSS</str>
+        <str>yyyy-MM-dd HH:mm:ssZ</str>
+        <str>yyyy-MM-dd HH:mm:ss</str>
+        <str>yyyy-MM-dd HH:mmZ</str>
+        <str>yyyy-MM-dd HH:mm</str>
+        <str>yyyy-MM-dd</str>
+      </arr>
+    </processor>
+    <processor class="solr.AddSchemaFieldsUpdateProcessorFactory">
+      <str name="defaultFieldType">text_general</str>
+      <lst name="typeMapping">
+        <str name="valueClass">java.lang.Boolean</str>
+        <str name="fieldType">booleans</str>
+      </lst>
+      <lst name="typeMapping">
+        <str name="valueClass">java.util.Date</str>
+        <str name="fieldType">tdates</str>
+      </lst>
+      <lst name="typeMapping">
+        <str name="valueClass">java.lang.Long</str>
+        <str name="valueClass">java.lang.Integer</str>
+        <str name="fieldType">tlongs</str>
+      </lst>
+      <lst name="typeMapping">
+        <str name="valueClass">java.lang.Number</str>
+        <str name="fieldType">tdoubles</str>
+      </lst>
+    </processor>
+    <processor class="solr.LogUpdateProcessorFactory"/>
+    <processor class="solr.RunUpdateProcessorFactory"/>
+  </updateRequestProcessorChain>
+
+  <!-- Deduplication
+
+       An example dedup update processor that creates the "id" field
+       on the fly based on the hash code of some other fields.  This
+       example has overwriteDupes set to false since we are using the
+       id field as the signatureField and Solr will maintain
+       uniqueness based on that anyway.  
+       
+    -->
+  <!--
+     <updateRequestProcessorChain name="dedupe">
+       <processor class="solr.processor.SignatureUpdateProcessorFactory">
+         <bool name="enabled">true</bool>
+         <str name="signatureField">id</str>
+         <bool name="overwriteDupes">false</bool>
+         <str name="fields">name,features,cat</str>
+         <str name="signatureClass">solr.processor.Lookup3Signature</str>
+       </processor>
+       <processor class="solr.LogUpdateProcessorFactory" />
+       <processor class="solr.RunUpdateProcessorFactory" />
+     </updateRequestProcessorChain>
+    -->
+
+  <!-- Language identification
+
+       This example update chain identifies the language of the incoming
+       documents using the langid contrib. The detected language is
+       written to field language_s. No field name mapping is done.
+       The fields used for detection are text, title, subject and description,
+       making this example suitable for detecting languages form full-text
+       rich documents injected via ExtractingRequestHandler.
+       See more about langId at http://wiki.apache.org/solr/LanguageDetection
+    -->
+  <!--
+   <updateRequestProcessorChain name="langid">
+     <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
+       <str name="langid.fl">text,title,subject,description</str>
+       <str name="langid.langField">language_s</str>
+       <str name="langid.fallback">en</str>
+     </processor>
+     <processor class="solr.LogUpdateProcessorFactory" />
+     <processor class="solr.RunUpdateProcessorFactory" />
+   </updateRequestProcessorChain>
+  -->
+
+  <!-- Script update processor
+
+    This example hooks in an update processor implemented using JavaScript.
+
+    See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
+  -->
+  <!--
+    <updateRequestProcessorChain name="script">
+      <processor class="solr.StatelessScriptUpdateProcessorFactory">
+        <str name="script">update-script.js</str>
+        <lst name="params">
+          <str name="config_param">example config parameter</str>
+        </lst>
+      </processor>
+      <processor class="solr.RunUpdateProcessorFactory" />
+    </updateRequestProcessorChain>
+  -->
+
+  <!-- Response Writers
+
+       http://wiki.apache.org/solr/QueryResponseWriter
+
+       Request responses will be written using the writer specified by
+       the 'wt' request parameter matching the name of a registered
+       writer.
+
+       The "default" writer is the default and will be used if 'wt' is
+       not specified in the request.
+    -->
+  <!-- The following response writers are implicitly configured unless
+       overridden...
+    -->
+  <!--
+     <queryResponseWriter name="xml" 
+                          default="true"
+                          class="solr.XMLResponseWriter" />
+     <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
+     <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
+     <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
+     <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
+     <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
+     <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
+     <queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/>
+    -->
+
+  <queryResponseWriter name="json" class="solr.JSONResponseWriter">
+    <!-- For the purposes of the tutorial, JSON responses are written as
+     plain text so that they are easy to read in *any* browser.
+     If you expect a MIME type of "application/json" just remove this override.
+    -->
+    <str name="content-type">text/plain; charset=UTF-8</str>
+  </queryResponseWriter>
+
+  <!--
+     Custom response writers can be declared as needed...
+    -->
+  <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
+
+
+  <!-- XSLT response writer transforms the XML output by any xslt file found
+       in Solr's conf/xslt directory.  Changes to xslt files are checked for
+       every xsltCacheLifetimeSeconds.  
+    -->
+  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
+    <int name="xsltCacheLifetimeSeconds">5</int>
+  </queryResponseWriter>
+
+  <!-- Query Parsers
+
+       http://wiki.apache.org/solr/SolrQuerySyntax
+
+       Multiple QParserPlugins can be registered by name, and then
+       used in either the "defType" param for the QueryComponent (used
+       by SearchHandler) or in LocalParams
+    -->
+  <!-- example of registering a query parser -->
+  <!--
+     <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
+    -->
+
+  <!-- Function Parsers
+
+       http://wiki.apache.org/solr/FunctionQuery
+
+       Multiple ValueSourceParsers can be registered by name, and then
+       used as function names when using the "func" QParser.
+    -->
+  <!-- example of registering a custom function parser  -->
+  <!--
+     <valueSourceParser name="myfunc" 
+                        class="com.mycompany.MyValueSourceParser" />
+    -->
+
+
+  <!-- Document Transformers
+       http://wiki.apache.org/solr/DocTransformers
+    -->
+  <!--
+     Could be something like:
+     <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
+       <int name="connection">jdbc://....</int>
+     </transformer>
+     
+     To add a constant value to all docs, use:
+     <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
+       <int name="value">5</int>
+     </transformer>
+     
+     If you want the user to still be able to change it with _value:something_ use this:
+     <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
+       <double name="defaultValue">5</double>
+     </transformer>
+
+      If you are using the QueryElevationComponent, you may wish to mark documents that get boosted.  The
+      EditorialMarkerFactory will do exactly that:
+     <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
+    -->
+
+
+  <!-- Legacy config for the admin interface -->
+  <admin>
+    <defaultQuery>*:*</defaultQuery>
+  </admin>
+
+</config>
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/stopwords.txt b/zookeeper/example-schemaless/solr/collection1/conf/stopwords.txt

new file mode 100644 (file)

index 0000000..ae1e83e
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/stopwords.txt
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/zookeeper/example-schemaless/solr/collection1/conf/synonyms.txt b/zookeeper/example-schemaless/solr/collection1/conf/synonyms.txt

new file mode 100644 (file)

index 0000000..7f72128
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/conf/synonyms.txt
@@ -0,0 +1,29 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
diff --git a/zookeeper/example-schemaless/solr/collection1/core.properties b/zookeeper/example-schemaless/solr/collection1/core.properties

new file mode 100644 (file)

index 0000000..bc0cf7d
--- /dev/null
+++ b/zookeeper/example-schemaless/solr/collection1/core.properties
@@ -0,0 +1 @@
+name=collection1
\ No newline at end of file
diff --git a/zookeeper/exampledocs/books.csv b/zookeeper/exampledocs/books.csv

new file mode 100644 (file)

index 0000000..8ccecbb
--- /dev/null
+++ b/zookeeper/exampledocs/books.csv
@@ -0,0 +1,11 @@
+id,cat,name,price,inStock,author,series_t,sequence_i,genre_s
+0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy
+0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy
+055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy
+0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi
+0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy
+0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi
+0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy
+0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy
+0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy
+080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy
diff --git a/zookeeper/exampledocs/books.json b/zookeeper/exampledocs/books.json

new file mode 100644 (file)

index 0000000..f82d510
--- /dev/null
+++ b/zookeeper/exampledocs/books.json
@@ -0,0 +1,51 @@
+[
+  {
+    "id" : "978-0641723445",
+    "cat" : ["book","hardcover"],
+    "name" : "The Lightning Thief",
+    "author" : "Rick Riordan",
+    "series_t" : "Percy Jackson and the Olympians",
+    "sequence_i" : 1,
+    "genre_s" : "fantasy",
+    "inStock" : true,
+    "price" : 12.50,
+    "pages_i" : 384
+  }
+,
+  {
+    "id" : "978-1423103349",
+    "cat" : ["book","paperback"],
+    "name" : "The Sea of Monsters",
+    "author" : "Rick Riordan",
+    "series_t" : "Percy Jackson and the Olympians",
+    "sequence_i" : 2,
+    "genre_s" : "fantasy",
+    "inStock" : true,
+    "price" : 6.49,
+    "pages_i" : 304
+  }
+,
+  {
+    "id" : "978-1857995879",
+    "cat" : ["book","paperback"],
+    "name" : "Sophie's World : The Greek Philosophers",
+    "author" : "Jostein Gaarder",
+    "sequence_i" : 1,
+    "genre_s" : "fantasy",
+    "inStock" : true,
+    "price" : 3.07,
+    "pages_i" : 64
+  }
+,
+  {
+    "id" : "978-1933988177",
+    "cat" : ["book","paperback"],
+    "name" : "Lucene in Action, Second Edition",
+    "author" : "Michael McCandless",
+    "sequence_i" : 1,
+    "genre_s" : "IT",
+    "inStock" : true,
+    "price" : 30.50,
+    "pages_i" : 475
+  }
+]
diff --git a/zookeeper/exampledocs/gb18030-example.xml b/zookeeper/exampledocs/gb18030-example.xml

new file mode 100644 (file)

index 0000000..769be19
--- /dev/null
+++ b/zookeeper/exampledocs/gb18030-example.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="GB18030"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add>
+  <doc>
+    <field name="id">GB18030TEST</field>
+    <field name="name">Test with some GB18030 encoded characters</field>
+    <field name="features">No accents here</field>
+    <field name="features">ÕâÊÇÒ»¸ö¹¦ÄÜ</field>
+    <field name="features">This is a feature (translated)</field>
+    <field name="features">Õâ·ÝÎÄ¼þÊÇºÜÓÐ¹âÔó</field>
+    <field name="features">This document is very shiny (translated)</field>
+    <field name="price">0</field>
+    <field name="inStock">true</field>
+  </doc>
+</add>
+
diff --git a/zookeeper/exampledocs/hd.xml b/zookeeper/exampledocs/hd.xml

new file mode 100644 (file)

index 0000000..3c5448d
--- /dev/null
+++ b/zookeeper/exampledocs/hd.xml
@@ -0,0 +1,56 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add>
+<doc>
+  <field name="id">SP2514N</field>
+  <field name="name">Samsung SpinPoint P120 SP2514N - hard drive - 250 GB - ATA-133</field>
+  <field name="manu">Samsung Electronics Co. Ltd.</field>
+  <!-- Join -->
+  <field name="manu_id_s">samsung</field>
+  <field name="cat">electronics</field>
+  <field name="cat">hard drive</field>
+  <field name="features">7200RPM, 8MB cache, IDE Ultra ATA-133</field>
+  <field name="features">NoiseGuard, SilentSeek technology, Fluid Dynamic Bearing (FDB) motor</field>
+  <field name="price">92</field>
+  <field name="popularity">6</field>
+  <field name="inStock">true</field>
+  <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
+  <!-- Near Oklahoma city -->
+  <field name="store">35.0752,-97.032</field>
+</doc>
+
+<doc>
+  <field name="id">6H500F0</field>
+  <field name="name">Maxtor DiamondMax 11 - hard drive - 500 GB - SATA-300</field>
+  <field name="manu">Maxtor Corp.</field>
+  <!-- Join -->
+  <field name="manu_id_s">maxtor</field>
+  <field name="cat">electronics</field>
+  <field name="cat">hard drive</field>
+  <field name="features">SATA 3.0Gb/s, NCQ</field>
+  <field name="features">8.5ms seek</field>
+  <field name="features">16MB cache</field>
+  <field name="price">350</field>
+  <field name="popularity">6</field>
+  <field name="inStock">true</field>
+  <!-- Buffalo store -->
+  <field name="store">45.17614,-93.87341</field>
+  <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
+</doc>
+</add>
+
diff --git a/zookeeper/exampledocs/ipod_other.xml b/zookeeper/exampledocs/ipod_other.xml

new file mode 100644 (file)

index 0000000..7756c9f
--- /dev/null
+++ b/zookeeper/exampledocs/ipod_other.xml
@@ -0,0 +1,60 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add>
+
+<doc>
+  <field name="id">F8V7067-APL-KIT</field>
+  <field name="name">Belkin Mobile Power Cord for iPod w/ Dock</field>
+  <field name="manu">Belkin</field>
+  <!-- Join -->
+  <field name="manu_id_s">belkin</field>
+  <field name="cat">electronics</field>
+  <field name="cat">connector</field>
+  <field name="features">car power adapter, white</field>
+  <field name="weight">4</field>
+  <field name="price">19.95</field>
+  <field name="popularity">1</field>
+  <field name="inStock">false</field>
+  <!-- Buffalo store -->
+  <field name="store">45.18014,-93.87741</field>
+  <field name="manufacturedate_dt">2005-08-01T16:30:25Z</field>
+</doc>
+
+<doc>
+  <field name="id">IW-02</field>
+  <field name="name">iPod &amp; iPod Mini USB 2.0 Cable</field>
+  <field name="manu">Belkin</field>
+  <!-- Join -->
+  <field name="manu_id_s">belkin</field>
+  <field name="cat">electronics</field>
+  <field name="cat">connector</field>
+  <field name="features">car power adapter for iPod, white</field>
+  <field name="weight">2</field>
+  <field name="price">11.50</field>
+  <field name="popularity">1</field>
+  <field name="inStock">false</field>
+  <!-- San Francisco store -->
+  <field name="store">37.7752,-122.4232</field>
+  <field name="manufacturedate_dt">2006-02-14T23:55:59Z</field>
+</doc>
+
+
+</add>
+
+
+
diff --git a/zookeeper/exampledocs/ipod_video.xml b/zookeeper/exampledocs/ipod_video.xml

new file mode 100644 (file)

index 0000000..1ca5f6f
--- /dev/null
+++ b/zookeeper/exampledocs/ipod_video.xml
@@ -0,0 +1,40 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add><doc>
+  <field name="id">MA147LL/A</field>
+  <field name="name">Apple 60 GB iPod with Video Playback Black</field>
+  <field name="manu">Apple Computer Inc.</field>
+  <!-- Join -->
+  <field name="manu_id_s">apple</field>
+  <field name="cat">electronics</field>
+  <field name="cat">music</field>
+  <field name="features">iTunes, Podcasts, Audiobooks</field>
+  <field name="features">Stores up to 15,000 songs, 25,000 photos, or 150 hours of video</field>
+  <field name="features">2.5-inch, 320x240 color TFT LCD display with LED backlight</field>
+  <field name="features">Up to 20 hours of battery life</field>
+  <field name="features">Plays AAC, MP3, WAV, AIFF, Audible, Apple Lossless, H.264 video</field>
+  <field name="features">Notes, Calendar, Phone book, Hold button, Date display, Photo wallet, Built-in games, JPEG photo playback, Upgradeable firmware, USB 2.0 compatibility, Playback speed control, Rechargeable capability, Battery level indication</field>
+  <field name="includes">earbud headphones, USB cable</field>
+  <field name="weight">5.5</field>
+  <field name="price">399.00</field>
+  <field name="popularity">10</field>
+  <field name="inStock">true</field>
+  <!-- Dodge City store -->
+  <field name="store">37.7752,-100.0232</field>
+  <field name="manufacturedate_dt">2005-10-12T08:00:00Z</field>
+</doc></add>
diff --git a/zookeeper/exampledocs/manufacturers.xml b/zookeeper/exampledocs/manufacturers.xml

new file mode 100644 (file)

index 0000000..e3121d5
--- /dev/null
+++ b/zookeeper/exampledocs/manufacturers.xml
@@ -0,0 +1,75 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add>
+  <doc>
+    <field name="id">adata</field>
+    <field name="compName_s">A-Data Technology</field>
+    <field name="address_s">46221 Landing Parkway Fremont, CA 94538</field>
+  </doc>
+  <doc>
+    <field name="id">apple</field>
+    <field name="compName_s">Apple</field>
+    <field name="address_s">1 Infinite Way, Cupertino CA</field>
+  </doc>
+  <doc>
+    <field name="id">asus</field>
+    <field name="compName_s">ASUS Computer</field>
+    <field name="address_s">800 Corporate Way Fremont, CA 94539</field>
+  </doc>
+  <doc>
+    <field name="id">ati</field>
+    <field name="compName_s">ATI Technologies</field>
+    <field name="address_s">33 Commerce Valley Drive East Thornhill, ON L3T 7N6 Canada</field>
+  </doc>
+  <doc>
+    <field name="id">belkin</field>
+    <field name="compName_s">Belkin</field>
+    <field name="address_s">12045 E. Waterfront Drive Playa Vista, CA 90094</field>
+  </doc>
+  <doc>
+    <field name="id">canon</field>
+    <field name="compName_s">Canon, Inc.</field>
+    <field name="address_s">One Canon Plaza Lake Success, NY 11042</field>
+  </doc>
+  <doc>
+    <field name="id">corsair</field>
+    <field name="compName_s">Corsair Microsystems</field>
+    <field name="address_s">46221 Landing Parkway Fremont, CA 94538</field>
+  </doc>
+  <doc>
+    <field name="id">dell</field>
+    <field name="compName_s">Dell, Inc.</field>
+    <field name="address_s">One Dell Way Round Rock, Texas 78682</field>
+  </doc>
+  <doc>
+    <field name="id">maxtor</field>
+    <field name="compName_s">Maxtor Corporation</field>
+    <field name="address_s">920 Disc Drive Scotts Valley, CA 95066</field>
+  </doc>
+  <doc>
+    <field name="id">samsung</field>
+    <field name="compName_s">Samsung Electronics Co. Ltd.</field>
+    <field name="address_s">105 Challenger Rd. Ridgefield Park, NJ 07660-0511</field>
+  </doc>
+  <doc>
+    <field name="id">viewsonic</field>
+    <field name="compName_s">ViewSonic Corp</field>
+    <field name="address_s">381 Brea Canyon Road Walnut, CA 91789-0708</field>
+  </doc>
+</add>
+
diff --git a/zookeeper/exampledocs/mem.xml b/zookeeper/exampledocs/mem.xml

new file mode 100644 (file)

index 0000000..0b89d67
--- /dev/null
+++ b/zookeeper/exampledocs/mem.xml
@@ -0,0 +1,77 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add>
+<doc>
+  <field name="id">TWINX2048-3200PRO</field>
+  <field name="name">CORSAIR  XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail</field>
+  <field name="manu">Corsair Microsystems Inc.</field>
+  <!-- Join -->
+  <field name="manu_id_s">corsair</field>
+  <field name="cat">electronics</field>
+  <field name="cat">memory</field>
+  <field name="features">CAS latency 2,        2-3-3-6 timing, 2.75v, unbuffered, heat-spreader</field>
+  <field name="price">185</field>
+  <field name="popularity">5</field>
+  <field name="inStock">true</field>
+  <!-- San Francisco store -->
+  <field name="store">37.7752,-122.4232</field>
+  <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
+
+  <!-- a field for testing payload tagged text via DelimitedPayloadTokenFilter -->
+  <field name="payloads">electronics|6.0 memory|3.0</field>
+</doc>
+
+<doc>
+  <field name="id">VS1GB400C3</field>
+  <field name="name">CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - Retail</field>
+  <field name="manu">Corsair Microsystems Inc.</field>
+  <!-- Join -->
+  <field name="manu_id_s">corsair</field>
+  <field name="cat">electronics</field>
+  <field name="cat">memory</field>
+  <field name="price">74.99</field>
+  <field name="popularity">7</field>
+  <field name="inStock">true</field>
+  <!-- Dodge City store -->
+  <field name="store">37.7752,-100.0232</field>
+  <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
+
+  <field name="payloads">electronics|4.0 memory|2.0</field>
+</doc>
+
+<doc>
+  <field name="id">VDBDB1A16</field>
+  <field name="name">A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - OEM</field>
+  <field name="manu">A-DATA Technology Inc.</field>
+  <!-- Join -->
+  <field name="manu_id_s">corsair</field>
+  <field name="cat">electronics</field>
+  <field name="cat">memory</field>
+  <field name="features">CAS latency 3,         2.7v</field>
+  <!-- note: price & popularity is missing on this one -->
+  <field name="popularity">0</field>
+  <field name="inStock">true</field>
+  <!-- Buffalo store -->
+  <field name="store">45.18414,-93.88141</field>
+  <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
+
+  <field name="payloads">electronics|0.9 memory|0.1</field>
+</doc>
+
+</add>
+
diff --git a/zookeeper/exampledocs/money.xml b/zookeeper/exampledocs/money.xml

new file mode 100644 (file)

index 0000000..b1b8036
--- /dev/null
+++ b/zookeeper/exampledocs/money.xml
@@ -0,0 +1,65 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Example documents utilizing the CurrencyField type -->
+<add>
+<doc>
+  <field name="id">USD</field>
+  <field name="name">One Dollar</field>
+  <field name="manu">Bank of America</field>
+  <field name="manu_id_s">boa</field>
+  <field name="cat">currency</field>
+  <field name="features">Coins and notes</field>
+  <field name="price_c">1,USD</field>
+  <field name="inStock">true</field>
+</doc>
+
+<doc>
+  <field name="id">EUR</field>
+  <field name="name">One Euro</field>
+  <field name="manu">European Union</field>
+  <field name="manu_id_s">eu</field>
+  <field name="cat">currency</field>
+  <field name="features">Coins and notes</field>
+  <field name="price_c">1,EUR</field>
+  <field name="inStock">true</field>
+</doc>
+
+<doc>
+  <field name="id">GBP</field>
+  <field name="name">One British Pound</field>
+  <field name="manu">U.K.</field>
+  <field name="manu_id_s">uk</field>
+  <field name="cat">currency</field>
+  <field name="features">Coins and notes</field>
+  <field name="price_c">1,GBP</field>
+  <field name="inStock">true</field>
+</doc>
+
+<doc>
+  <field name="id">NOK</field>
+  <field name="name">One Krone</field>
+  <field name="manu">Bank of Norway</field>
+  <field name="manu_id_s">nor</field>
+  <field name="cat">currency</field>
+  <field name="features">Coins and notes</field>
+  <field name="price_c">1,NOK</field>
+  <field name="inStock">true</field>
+</doc>
+
+</add>
+
diff --git a/zookeeper/exampledocs/monitor.xml b/zookeeper/exampledocs/monitor.xml

new file mode 100644 (file)

index 0000000..db986fa
--- /dev/null
+++ b/zookeeper/exampledocs/monitor.xml
@@ -0,0 +1,35 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add><doc>
+  <field name="id">3007WFP</field>
+  <field name="name">Dell Widescreen UltraSharp 3007WFP</field>
+  <field name="manu">Dell, Inc.</field>
+  <!-- Join -->
+  <field name="manu_id_s">dell</field>
+  <field name="cat">electronics</field>
+  <field name="cat">monitor</field>
+  <field name="features">30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast</field>
+  <field name="includes">USB cable</field>
+  <field name="weight">401.6</field>
+  <field name="price">2199</field>
+  <field name="popularity">6</field>
+  <field name="inStock">true</field>
+  <!-- Buffalo store -->
+  <field name="store">43.17614,-90.57341</field>
+</doc></add>
+
diff --git a/zookeeper/exampledocs/monitor2.xml b/zookeeper/exampledocs/monitor2.xml

new file mode 100644 (file)

index 0000000..79b9949
--- /dev/null
+++ b/zookeeper/exampledocs/monitor2.xml
@@ -0,0 +1,34 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add><doc>
+  <field name="id">VA902B</field>
+  <field name="name">ViewSonic VA902B - flat panel display - TFT - 19"</field>
+  <field name="manu">ViewSonic Corp.</field>
+  <!-- Join -->
+  <field name="manu_id_s">viewsonic</field>
+  <field name="cat">electronics</field>
+  <field name="cat">monitor</field>
+  <field name="features">19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution</field>
+  <field name="weight">190.4</field>
+  <field name="price">279.95</field>
+  <field name="popularity">6</field>
+  <field name="inStock">true</field>
+  <!-- Buffalo store -->
+  <field name="store">45.18814,-93.88541</field>
+</doc></add>
+
diff --git a/zookeeper/exampledocs/mp500.xml b/zookeeper/exampledocs/mp500.xml

new file mode 100644 (file)

index 0000000..bab401a
--- /dev/null
+++ b/zookeeper/exampledocs/mp500.xml
@@ -0,0 +1,43 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add><doc>
+  <field name="id">0579B002</field>
+  <field name="name">Canon PIXMA MP500 All-In-One Photo Printer</field>
+  <field name="manu">Canon Inc.</field>
+  <!-- Join -->
+  <field name="manu_id_s">canon</field>
+  <field name="cat">electronics</field>
+  <field name="cat">multifunction printer</field>
+  <field name="cat">printer</field>
+  <field name="cat">scanner</field>
+  <field name="cat">copier</field>
+  <field name="features">Multifunction ink-jet color photo printer</field>
+  <field name="features">Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi</field>
+  <field name="features">2.5" color LCD preview screen</field>
+  <field name="features">Duplex Copying</field>
+  <field name="features">Printing speed up to 29ppm black, 19ppm color</field>
+  <field name="features">Hi-Speed USB</field>
+  <field name="features">memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard</field>
+  <field name="weight">352</field>
+  <field name="price">179.99</field>
+  <field name="popularity">6</field>
+  <field name="inStock">true</field>
+  <!-- Buffalo store -->
+  <field name="store">45.19214,-93.89941</field>
+</doc></add>
+
diff --git a/zookeeper/exampledocs/post.jar b/zookeeper/exampledocs/post.jar

new file mode 100644 (file)

index 0000000..0042a46

Binary files /dev/null and b/zookeeper/exampledocs/post.jar differ
diff --git a/zookeeper/exampledocs/post.sh b/zookeeper/exampledocs/post.sh

new file mode 100755 (executable)

index 0000000..113884d
--- /dev/null
+++ b/zookeeper/exampledocs/post.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FILES=$*
+URL=http://localhost:8983/solr/update
+
+for f in $FILES; do
+  echo Posting file $f to $URL
+  curl $URL --data-binary @$f -H 'Content-type:application/xml' 
+  echo
+done
+
+#send the commit command to make sure all the changes are flushed and visible
+#curl $URL --data-binary '<commit softCommit=true/>' -H 'Content-type:application/xml'
+
+curl "$URL?softCommit=true"
+echo
diff --git a/zookeeper/exampledocs/sd500.xml b/zookeeper/exampledocs/sd500.xml

new file mode 100644 (file)

index 0000000..145c6fd
--- /dev/null
+++ b/zookeeper/exampledocs/sd500.xml
@@ -0,0 +1,38 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add><doc>
+  <field name="id">9885A004</field>
+  <field name="name">Canon PowerShot SD500</field>
+  <field name="manu">Canon Inc.</field>
+  <!-- Join -->
+  <field name="manu_id_s">canon</field>
+  <field name="cat">electronics</field>
+  <field name="cat">camera</field>
+  <field name="features">3x zoop, 7.1 megapixel Digital ELPH</field>
+  <field name="features">movie clips up to 640x480 @30 fps</field>
+  <field name="features">2.0" TFT LCD, 118,000 pixels</field>
+  <field name="features">built in flash, red-eye reduction</field>
+  <field name="includes">32MB SD card, USB cable, AV cable, battery</field>
+  <field name="weight">6.4</field>
+  <field name="price">329.95</field>
+  <field name="popularity">7</field>
+  <field name="inStock">true</field>
+  <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
+  <!-- Buffalo store -->
+  <field name="store">45.19614,-93.90341</field>
+</doc></add>
diff --git a/zookeeper/exampledocs/solr.xml b/zookeeper/exampledocs/solr.xml

new file mode 100644 (file)

index 0000000..410e5f7
--- /dev/null
+++ b/zookeeper/exampledocs/solr.xml
@@ -0,0 +1,38 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add>
+<doc>
+  <field name="id">SOLR1000</field>
+  <field name="name">Solr, the Enterprise Search Server</field>
+  <field name="manu">Apache Software Foundation</field>
+  <field name="cat">software</field>
+  <field name="cat">search</field>
+  <field name="features">Advanced Full-Text Search Capabilities using Lucene</field>
+  <field name="features">Optimized for High Volume Web Traffic</field>
+  <field name="features">Standards Based Open Interfaces - XML and HTTP</field>
+  <field name="features">Comprehensive HTML Administration Interfaces</field>
+  <field name="features">Scalability - Efficient Replication to other Solr Search Servers</field>
+  <field name="features">Flexible and Adaptable with XML configuration and Schema</field>
+  <field name="features">Good unicode support: h&#xE9;llo (hello with an accent over the e)</field>
+  <field name="price">0</field>
+  <field name="popularity">10</field>
+  <field name="inStock">true</field>
+  <field name="incubationdate_dt">2006-01-17T00:00:00.000Z</field>
+</doc>
+</add>
+
diff --git a/zookeeper/exampledocs/test_utf8.sh b/zookeeper/exampledocs/test_utf8.sh

new file mode 100755 (executable)

index 0000000..edfd972
--- /dev/null
+++ b/zookeeper/exampledocs/test_utf8.sh
@@ -0,0 +1,93 @@
+#!/bin/sh
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#Test script to tell if the server is accepting UTF-8
+#The python writer currently escapes non-ascii chars, so it's good for testing
+
+URL=http://localhost:8983/solr
+
+if [ ! -z $1 ]; then
+  URL=$1
+fi
+
+curl "$URL/select?q=hello&params=explicit&wt=python" 2> /dev/null | grep 'hello' > /dev/null 2>&1
+if [ $? = 0 ]; then
+  echo "Solr server is up."
+else
+  echo "ERROR: Could not curl to Solr - is curl installed? Is Solr not running?"
+  exit 1
+fi
+
+curl "$URL/select?q=h%C3%A9llo&echoParams=explicit&wt=python" 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
+if [ $? = 0 ]; then
+  echo "HTTP GET is accepting UTF-8"
+else
+  echo "ERROR: HTTP GET is not accepting UTF-8"
+fi
+
+curl $URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
+if [ $? = 0 ]; then
+  echo "HTTP POST is accepting UTF-8"
+else
+  echo "ERROR: HTTP POST is not accepting UTF-8"
+fi
+
+curl $URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
+if [ $? = 0 ]; then
+  echo "HTTP POST defaults to UTF-8"
+else
+  echo "HTTP POST does not default to UTF-8"
+fi
+
+
+#A unicode character outside of the BMP (a circle with an x inside)
+CHAR="𐌈"
+CODEPOINT='0x10308'
+#URL encoded UTF8 of the codepoint
+URL_UTF8='%F0%90%8C%88'
+#expected return of the python writer (currently uses UTF-16 surrogates)
+EXPECTED='\\ud800\\udf08'
+
+curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=python" 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
+if [ $? = 0 ]; then
+  echo "HTTP GET is accepting UTF-8 beyond the basic multilingual plane"
+else
+  echo "ERROR: HTTP GET is not accepting UTF-8 beyond the basic multilingual plane"
+fi
+
+curl $URL/select --data-binary "q=$URL_UTF8&echoParams=explicit&wt=python"  -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
+if [ $? = 0 ]; then
+  echo "HTTP POST is accepting UTF-8 beyond the basic multilingual plane"
+else
+  echo "ERROR: HTTP POST is not accepting UTF-8 beyond the basic multilingual plane"
+fi
+
+curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=python" --data-binary '' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
+if [ $? = 0 ]; then
+  echo "HTTP POST + URL params is accepting UTF-8 beyond the basic multilingual plane"
+else
+  echo "ERROR: HTTP POST + URL params is not accepting UTF-8 beyond the basic multilingual plane"
+fi
+
+#curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=json" 2> /dev/null | od -tx1 -w1000 | sed 's/ //g' | grep 'f4808198' > /dev/null 2>&1
+curl "$URL/select?q=$URL_UTF8&echoParams=explicit&wt=json" 2> /dev/null | grep "$CHAR" > /dev/null 2>&1
+if [ $? = 0 ]; then
+  echo "Response correctly returns UTF-8 beyond the basic multilingual plane"
+else
+  echo "ERROR: Response can't return UTF-8 beyond the basic multilingual plane"
+fi
+
+
diff --git a/zookeeper/exampledocs/utf8-example.xml b/zookeeper/exampledocs/utf8-example.xml

new file mode 100644 (file)

index 0000000..c9486b2
--- /dev/null
+++ b/zookeeper/exampledocs/utf8-example.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- 
+  After posting this to SOLR with post.sh, searching for "êâîôû" from 
+  the solr/admin/ search page must return this document.
+ -->
+
+<add>
+  <doc>
+    <field name="id">UTF8TEST</field>
+    <field name="name">Test with some UTF-8 encoded characters</field>
+    <field name="manu">Apache Software Foundation</field>
+    <field name="cat">software</field>
+    <field name="cat">search</field>
+    <field name="features">No accents here</field>
+    <field name="features">This is an e acute: é</field>
+    <field name="features">eaiou with circumflexes: êâîôû</field>
+    <field name="features">eaiou with umlauts: ëäïöü</field>
+    <field name="features">tag with escaped chars: &lt;nicetag/&gt;</field>
+    <field name="features">escaped ampersand: Bonnie &amp; Clyde</field>
+    <field name="features">Outside the BMP:𐌈 codepoint=10308, a circle with an x inside. UTF8=f0908c88 UTF16=d800 df08</field>
+    <field name="price">0</field>
+    <field name="inStock">true</field>
+  </doc>
+</add>
+
diff --git a/zookeeper/exampledocs/vidcard.xml b/zookeeper/exampledocs/vidcard.xml

new file mode 100644 (file)

index 0000000..10b8121
--- /dev/null
+++ b/zookeeper/exampledocs/vidcard.xml
@@ -0,0 +1,62 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add>
+<doc>
+  <field name="id">EN7800GTX/2DHTV/256M</field>
+  <field name="name">ASUS Extreme N7800GTX/2DHTV (256 MB)</field>
+  <!-- Denormalized -->
+  <field name="manu">ASUS Computer Inc.</field>
+  <!-- Join -->
+  <field name="manu_id_s">asus</field>
+  <field name="cat">electronics</field>
+  <field name="cat">graphics card</field>
+  <field name="features">NVIDIA GeForce 7800 GTX GPU/VPU clocked at 486MHz</field>
+  <field name="features">256MB GDDR3 Memory clocked at 1.35GHz</field>
+  <field name="features">PCI Express x16</field>
+  <field name="features">Dual DVI connectors, HDTV out, video input</field>
+  <field name="features">OpenGL 2.0, DirectX 9.0</field>
+  <field name="weight">16</field>
+  <field name="price">479.95</field>
+  <field name="popularity">7</field>
+  <field name="store">40.7143,-74.006</field>
+  <field name="inStock">false</field>
+  <field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
+</doc>
+  <!-- yes, you can add more than one document at a time -->
+<doc>
+  <field name="id">100-435805</field>
+  <field name="name">ATI Radeon X1900 XTX 512 MB PCIE Video Card</field>
+  <field name="manu">ATI Technologies</field>
+  <!-- Join -->
+  <field name="manu_id_s">ati</field>
+  <field name="cat">electronics</field>
+  <field name="cat">graphics card</field>
+  <field name="features">ATI RADEON X1900 GPU/VPU clocked at 650MHz</field>
+  <field name="features">512MB GDDR3 SDRAM clocked at 1.55GHz</field>
+  <field name="features">PCI Express x16</field>
+  <field name="features">dual DVI, HDTV, svideo, composite out</field>
+  <field name="features">OpenGL 2.0, DirectX 9.0</field>
+  <field name="weight">48</field>
+  <field name="price">649.99</field>
+  <field name="popularity">7</field>
+  <field name="inStock">false</field>
+  <field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
+  <!-- NYC store -->
+  <field name="store">40.7143,-74.006</field>
+</doc>
+</add>
diff --git a/zookeeper/lib/ext/jcl-over-slf4j-1.6.6.jar b/zookeeper/lib/ext/jcl-over-slf4j-1.6.6.jar

new file mode 100644 (file)

index 0000000..ab898c0

Binary files /dev/null and b/zookeeper/lib/ext/jcl-over-slf4j-1.6.6.jar differ
diff --git a/zookeeper/lib/ext/jul-to-slf4j-1.6.6.jar b/zookeeper/lib/ext/jul-to-slf4j-1.6.6.jar

new file mode 100644 (file)

index 0000000..fa8640f

Binary files /dev/null and b/zookeeper/lib/ext/jul-to-slf4j-1.6.6.jar differ
diff --git a/zookeeper/lib/ext/log4j-1.2.16.jar b/zookeeper/lib/ext/log4j-1.2.16.jar

new file mode 100644 (file)

index 0000000..5429a90

Binary files /dev/null and b/zookeeper/lib/ext/log4j-1.2.16.jar differ
diff --git a/zookeeper/lib/ext/slf4j-api-1.6.6.jar b/zookeeper/lib/ext/slf4j-api-1.6.6.jar

new file mode 100644 (file)

index 0000000..4c03fa6

Binary files /dev/null and b/zookeeper/lib/ext/slf4j-api-1.6.6.jar differ
diff --git a/zookeeper/lib/ext/slf4j-log4j12-1.6.6.jar b/zookeeper/lib/ext/slf4j-log4j12-1.6.6.jar

new file mode 100644 (file)

index 0000000..e72c2d6

Binary files /dev/null and b/zookeeper/lib/ext/slf4j-log4j12-1.6.6.jar differ
diff --git a/zookeeper/lib/jetty-continuation-8.1.10.v20130312.jar b/zookeeper/lib/jetty-continuation-8.1.10.v20130312.jar

new file mode 100644 (file)

index 0000000..c19fda2

Binary files /dev/null and b/zookeeper/lib/jetty-continuation-8.1.10.v20130312.jar differ
diff --git a/zookeeper/lib/jetty-deploy-8.1.10.v20130312.jar b/zookeeper/lib/jetty-deploy-8.1.10.v20130312.jar

new file mode 100644 (file)

index 0000000..986513b

Binary files /dev/null and b/zookeeper/lib/jetty-deploy-8.1.10.v20130312.jar differ
diff --git a/zookeeper/lib/jetty-http-8.1.10.v20130312.jar b/zookeeper/lib/jetty-http-8.1.10.v20130312.jar

new file mode 100644 (file)

index 0000000..e0fecc5

Binary files /dev/null and b/zookeeper/lib/jetty-http-8.1.10.v20130312.jar differ
diff --git a/zookeeper/lib/jetty-io-8.1.10.v20130312.jar b/zookeeper/lib/jetty-io-8.1.10.v20130312.jar

new file mode 100644 (file)

index 0000000..e686933

Binary files /dev/null and b/zookeeper/lib/jetty-io-8.1.10.v20130312.jar differ
diff --git a/zookeeper/lib/jetty-jmx-8.1.10.v20130312.jar b/zookeeper/lib/jetty-jmx-8.1.10.v20130312.jar

new file mode 100644 (file)

index 0000000..568afb3

Binary files /dev/null and b/zookeeper/lib/jetty-jmx-8.1.10.v20130312.jar differ
diff --git a/zookeeper/lib/jetty-security-8.1.10.v20130312.jar b/zookeeper/lib/jetty-security-8.1.10.v20130312.jar

new file mode 100644 (file)

index 0000000..4a3054e

Binary files /dev/null and b/zookeeper/lib/jetty-security-8.1.10.v20130312.jar differ
diff --git a/zookeeper/lib/jetty-server-8.1.10.v20130312.jar b/zookeeper/lib/jetty-server-8.1.10.v20130312.jar

new file mode 100644 (file)

index 0000000..e563ec5

Binary files /dev/null and b/zookeeper/lib/jetty-server-8.1.10.v20130312.jar differ
diff --git a/zookeeper/lib/jetty-servlet-8.1.10.v20130312.jar b/zookeeper/lib/jetty-servlet-8.1.10.v20130312.jar

new file mode 100644 (file)

index 0000000..1f13d52

Binary files /dev/null and b/zookeeper/lib/jetty-servlet-8.1.10.v20130312.jar differ
diff --git a/zookeeper/lib/jetty-util-8.1.10.v20130312.jar b/zookeeper/lib/jetty-util-8.1.10.v20130312.jar

new file mode 100644 (file)

index 0000000..018b2ea

Binary files /dev/null and b/zookeeper/lib/jetty-util-8.1.10.v20130312.jar differ
diff --git a/zookeeper/lib/jetty-webapp-8.1.10.v20130312.jar b/zookeeper/lib/jetty-webapp-8.1.10.v20130312.jar

new file mode 100644 (file)

index 0000000..c47b968

Binary files /dev/null and b/zookeeper/lib/jetty-webapp-8.1.10.v20130312.jar differ
diff --git a/zookeeper/lib/jetty-xml-8.1.10.v20130312.jar b/zookeeper/lib/jetty-xml-8.1.10.v20130312.jar

new file mode 100644 (file)

index 0000000..039702f

Binary files /dev/null and b/zookeeper/lib/jetty-xml-8.1.10.v20130312.jar differ
diff --git a/zookeeper/lib/servlet-api-3.0.jar b/zookeeper/lib/servlet-api-3.0.jar

new file mode 100644 (file)

index 0000000..b135409

Binary files /dev/null and b/zookeeper/lib/servlet-api-3.0.jar differ
diff --git a/zookeeper/multicore/README.txt b/zookeeper/multicore/README.txt

new file mode 100644 (file)

index 0000000..eba1457
--- /dev/null
+++ b/zookeeper/multicore/README.txt
@@ -0,0 +1,7 @@
+This is an alternative setup structure to support multiple cores.
+
+To run this configuration, start jetty in the example/ directory using:
+
+java -Dsolr.solr.home=multicore -jar start.jar
+
+For general examples on standard solr configuration, see the "solr" directory.
diff --git a/zookeeper/multicore/core0/conf/schema.xml b/zookeeper/multicore/core0/conf/schema.xml

new file mode 100644 (file)

index 0000000..7401b5b
--- /dev/null
+++ b/zookeeper/multicore/core0/conf/schema.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="example core zero" version="1.1">
+  <types>
+   <fieldtype name="string"  class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
+   <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
+  </types>
+
+ <fields>   
+  <!-- general -->
+  <field name="id"        type="string"   indexed="true"  stored="true"  multiValued="false" required="true"/>
+  <field name="type"      type="string"   indexed="true"  stored="true"  multiValued="false" /> 
+  <field name="name"      type="string"   indexed="true"  stored="true"  multiValued="false" /> 
+  <field name="core0"     type="string"   indexed="true"  stored="true"  multiValued="false" /> 
+  <field name="_version_" type="long"     indexed="true"  stored="true"/>
+ </fields>
+
+ <!-- field to use to determine and enforce document uniqueness. -->
+ <uniqueKey>id</uniqueKey>
+
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
+ <defaultSearchField>name</defaultSearchField>
+
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
+ <solrQueryParser defaultOperator="OR"/>
+</schema>
+
diff --git a/zookeeper/multicore/core0/conf/solrconfig.xml b/zookeeper/multicore/core0/conf/solrconfig.xml

new file mode 100644 (file)

index 0000000..1eb29c6
--- /dev/null
+++ b/zookeeper/multicore/core0/conf/solrconfig.xml
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ This is a stripped down config file used for a simple example...  
+ It is *not* a good example to work from. 
+-->
+<config>
+  <luceneMatchVersion>4.4</luceneMatchVersion>
+  <!--  The DirectoryFactory to use for indexes.
+        solr.StandardDirectoryFactory, the default, is filesystem based.
+        solr.RAMDirectoryFactory is memory based, not persistent, and doesn't work with replication. -->
+  <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
+
+  <dataDir>${solr.core0.data.dir:}</dataDir>
+
+  <!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
+  
+       <schemaFactory class="ManagedIndexSchemaFactory">
+         <bool name="mutable">true</bool>
+         <str name="managedSchemaResourceName">managed-schema</str>
+       </schemaFactory>
+       
+       When ManagedIndexSchemaFactory is specified, Solr will load the schema from
+       he resource named in 'managedSchemaResourceName', rather than from schema.xml.
+       Note that the managed schema resource CANNOT be named schema.xml.  If the managed
+       schema does not exist, Solr will create it after reading schema.xml, then rename
+       'schema.xml' to 'schema.xml.bak'. 
+       
+       Do NOT hand edit the managed schema - external modifications will be ignored and
+       overwritten as a result of schema modification REST API calls.
+
+       When ManagedIndexSchemaFactory is specified with mutable = true, schema
+       modification REST API calls will be allowed; otherwise, error responses will be
+       sent back for these requests. 
+  -->
+  <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+  <updateHandler class="solr.DirectUpdateHandler2">
+    <updateLog>
+      <str name="dir">${solr.core0.data.dir:}</str>
+    </updateLog>
+  </updateHandler>
+
+  <!-- realtime get handler, guaranteed to return the latest stored fields 
+    of any document, without the need to commit or open a new searcher. The current 
+    implementation relies on the updateLog feature being enabled. -->
+  <requestHandler name="/get" class="solr.RealTimeGetHandler">
+    <lst name="defaults">
+      <str name="omitHeader">true</str>
+    </lst>
+  </requestHandler>  
+  
+  <requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" /> 
+
+  <requestDispatcher handleSelect="true" >
+    <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" formdataUploadLimitInKB="2048" />
+  </requestDispatcher>
+  
+  <requestHandler name="standard" class="solr.StandardRequestHandler" default="true" />
+  <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
+  <requestHandler name="/update" class="solr.UpdateRequestHandler"  />
+  <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
+     
+  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
+    <lst name="invariants">
+      <str name="q">solrpingquery</str>
+    </lst>
+    <lst name="defaults">
+      <str name="echoParams">all</str>
+    </lst>
+  </requestHandler>
+   
+  <!-- config for the admin interface --> 
+  <admin>
+    <defaultQuery>solr</defaultQuery>
+  </admin>
+
+</config>
+
diff --git a/zookeeper/multicore/core1/conf/schema.xml b/zookeeper/multicore/core1/conf/schema.xml

new file mode 100644 (file)

index 0000000..5a27d39
--- /dev/null
+++ b/zookeeper/multicore/core1/conf/schema.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="example core one" version="1.1">
+  <types>
+   <fieldtype name="string"  class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
+   <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
+  </types>
+
+ <fields>   
+  <!-- general -->
+  <field name="id"        type="string"    indexed="true"  stored="true"  multiValued="false" required="true"/>
+  <field name="type"      type="string"    indexed="true"  stored="true"  multiValued="false" /> 
+  <field name="name"      type="string"    indexed="true"  stored="true"  multiValued="false" /> 
+  <field name="core1"     type="string"    indexed="true"  stored="true"  multiValued="false" />
+  <field name="_version_" type="long"      indexed="true"  stored="true"/>
+ </fields>
+
+ <!-- field to use to determine and enforce document uniqueness. -->
+ <uniqueKey>id</uniqueKey>
+
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
+ <defaultSearchField>name</defaultSearchField>
+
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
+ <solrQueryParser defaultOperator="OR"/>
+</schema>
+
diff --git a/zookeeper/multicore/core1/conf/solrconfig.xml b/zookeeper/multicore/core1/conf/solrconfig.xml

new file mode 100644 (file)

index 0000000..c0aff09
--- /dev/null
+++ b/zookeeper/multicore/core1/conf/solrconfig.xml
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ This is a stripped down config file used for a simple example...  
+ It is *not* a good example to work from. 
+-->
+<config>
+  <luceneMatchVersion>4.4</luceneMatchVersion>
+  <!--  The DirectoryFactory to use for indexes.
+        solr.StandardDirectoryFactory, the default, is filesystem based.
+        solr.RAMDirectoryFactory is memory based, not persistent, and doesn't work with replication. -->
+  <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
+
+  <dataDir>${solr.core1.data.dir:}</dataDir>
+
+  <!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
+  
+       <schemaFactory class="ManagedIndexSchemaFactory">
+         <bool name="mutable">true</bool>
+         <str name="managedSchemaResourceName">managed-schema</str>
+       </schemaFactory>
+       
+       When ManagedIndexSchemaFactory is specified, Solr will load the schema from
+       he resource named in 'managedSchemaResourceName', rather than from schema.xml.
+       Note that the managed schema resource CANNOT be named schema.xml.  If the managed
+       schema does not exist, Solr will create it after reading schema.xml, then rename
+       'schema.xml' to 'schema.xml.bak'. 
+       
+       Do NOT hand edit the managed schema - external modifications will be ignored and
+       overwritten as a result of schema modification REST API calls.
+
+       When ManagedIndexSchemaFactory is specified with mutable = true, schema
+       modification REST API calls will be allowed; otherwise, error responses will be
+       sent back for these requests. 
+  -->
+  <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+  <updateHandler class="solr.DirectUpdateHandler2">
+    <updateLog>
+      <str name="dir">${solr.core1.data.dir:}</str>
+    </updateLog>
+  </updateHandler>
+
+  <!-- realtime get handler, guaranteed to return the latest stored fields 
+    of any document, without the need to commit or open a new searcher. The current 
+    implementation relies on the updateLog feature being enabled. -->
+  <requestHandler name="/get" class="solr.RealTimeGetHandler">
+    <lst name="defaults">
+      <str name="omitHeader">true</str>
+    </lst>
+  </requestHandler>
+  
+  <requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" /> 
+
+  <requestDispatcher handleSelect="true" >
+    <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" formdataUploadLimitInKB="2048" />
+  </requestDispatcher>
+  
+  <requestHandler name="standard" class="solr.StandardRequestHandler" default="true" />
+  <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
+  <requestHandler name="/update" class="solr.UpdateRequestHandler"  />
+  <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
+
+  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
+    <lst name="invariants">
+      <str name="q">solrpingquery</str>
+    </lst>
+    <lst name="defaults">
+      <str name="echoParams">all</str>
+    </lst>
+  </requestHandler>
+
+  <!-- config for the admin interface --> 
+  <admin>
+    <defaultQuery>solr</defaultQuery>
+  </admin>
+
+</config>
+
diff --git a/zookeeper/multicore/exampledocs/ipod_other.xml b/zookeeper/multicore/exampledocs/ipod_other.xml

new file mode 100644 (file)

index 0000000..4bfa310
--- /dev/null
+++ b/zookeeper/multicore/exampledocs/ipod_other.xml
@@ -0,0 +1,34 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add>
+
+<doc>
+  <field name="id">F8V7067-APL-KIT</field>
+  <field name="name">Belkin Mobile Power Cord for iPod w/ Dock</field>
+</doc>
+
+<doc>
+  <field name="id">IW-02</field>
+  <field name="name">iPod &amp; iPod Mini USB 2.0 Cable</field>
+</doc>
+
+
+</add>
+
+
+
diff --git a/zookeeper/multicore/exampledocs/ipod_video.xml b/zookeeper/multicore/exampledocs/ipod_video.xml

new file mode 100644 (file)

index 0000000..3547fd3
--- /dev/null
+++ b/zookeeper/multicore/exampledocs/ipod_video.xml
@@ -0,0 +1,22 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<add><doc>
+  <field name="id">MA147LL/A</field>
+  <field name="name">Apple 60 GB iPod with Video Playback Black</field>
+
+</doc></add>
diff --git a/zookeeper/multicore/solr.xml b/zookeeper/multicore/solr.xml

new file mode 100644 (file)

index 0000000..2707901
--- /dev/null
+++ b/zookeeper/multicore/solr.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ All (relative) paths are relative to the installation path
+  
+  persistent: Save changes made via the API to this file
+  sharedLib: path to a lib directory that will be shared across all cores
+-->
+<solr persistent="false">
+
+  <!--
+  adminPath: RequestHandler path to manage cores.  
+    If 'null' (or absent), cores will not be manageable via request handler
+  -->
+  <cores adminPath="/admin/cores" host="${host:}" hostPort="${jetty.port:8983}" hostContext="${hostContext:solr}">
+    <core name="core0" instanceDir="core0" />
+    <core name="core1" instanceDir="core1" />
+  </cores>
+</solr>
diff --git a/zookeeper/multicore/zoo.cfg b/zookeeper/multicore/zoo.cfg

new file mode 100644 (file)

index 0000000..aea4518
--- /dev/null
+++ b/zookeeper/multicore/zoo.cfg
@@ -0,0 +1,17 @@
+# The number of milliseconds of each tick
+tickTime=2000
+# The number of ticks that the initial
+# synchronization phase can take
+initLimit=10
+# The number of ticks that can pass between
+# sending a request and getting an acknowledgement
+syncLimit=5
+
+# the directory where the snapshot is stored.
+# dataDir=/opt/zookeeper/data
+# NOTE: Solr defaults the dataDir to <solrHome>/zoo_data
+
+# the port at which the clients will connect
+# clientPort=2181
+# NOTE: Solr sets this based on zkRun / zkHost params
+
diff --git a/zookeeper/options b/zookeeper/options

new file mode 100644 (file)

index 0000000..f31b9ec
--- /dev/null
+++ b/zookeeper/options
@@ -0,0 +1 @@
+OPTIONS=" -Dbootstrap_confdir=./solr/collection1/conf -Dcollection.configName=myconf -DzkRun -DzkHost=opencontent-solr.index:9983 -DnumShards=2 "
\ No newline at end of file
diff --git a/zookeeper/options_2 b/zookeeper/options_2

new file mode 100644 (file)

index 0000000..bb3930f
--- /dev/null
+++ b/zookeeper/options_2
@@ -0,0 +1,4 @@
+OPTIONS="-Djetty.port=7500 -DzkHost=opencontent-solr.index:9983" 
+NAME=second
+PID_FILE="./${NAME}.pid"
+LOG_FILE="./${NAME}.log"
diff --git a/zookeeper/resources/log4j.properties b/zookeeper/resources/log4j.properties

new file mode 100644 (file)

index 0000000..f33fa71
--- /dev/null
+++ b/zookeeper/resources/log4j.properties
@@ -0,0 +1,24 @@
+#  Logging level
+solr.log=logs/
+log4j.rootLogger=INFO, file, CONSOLE
+
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x \u2013 %m%n
+
+#- size rotation with log cleanup.
+log4j.appender.file=org.apache.log4j.RollingFileAppender
+log4j.appender.file.MaxFileSize=4MB
+log4j.appender.file.MaxBackupIndex=9
+
+#- File to log to and log format
+log4j.appender.file.File=${solr.log}/solr.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %C; %m\n
+
+log4j.logger.org.apache.zookeeper=WARN
+log4j.logger.org.apache.hadoop=WARN
+
+# set to INFO to enable infostream log messages
+log4j.logger.org.apache.solr.update.LoggingInfoStream=OFF
diff --git a/zookeeper/solr/README.txt b/zookeeper/solr/README.txt

new file mode 100644 (file)

index 0000000..64d7c41
--- /dev/null
+++ b/zookeeper/solr/README.txt
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+Example Solr Home Directory
+=============================
+
+This directory is provided as an example of what a "Solr Home" directory
+should look like.
+
+It's not strictly necessary that you copy all of the files in this
+directory when setting up a new instance of Solr, but it is recommended.
+
+
+Basic Directory Structure
+-------------------------
+
+The Solr Home directory typically contains the following...
+
+* solr.xml *
+
+This is the primary configuration file Solr looks for when starting.
+This file specifies the list of "SolrCores" it should load, and high 
+level configuration options that should be used for all SolrCores.
+
+Please see the comments in ./solr.xml for more details.
+
+If no solr.xml file is found, then Solr assumes that there should be
+a single SolrCore named "collection1" and that the "Instance Directory" 
+for collection1 should be the same as the Solr Home Directory.
+
+* Individual SolrCore Instance Directories *
+
+Although solr.xml can be configured to look for SolrCore Instance Directories 
+in any path, simple sub-directories of the Solr Home Dir using relative paths 
+are common for many installations.  In this directory you can see the 
+"./collection1" Instance Directory.
+
+* A Shared 'lib' Directory *
+
+Although solr.xml can be configured with an optional "sharedLib" attribute 
+that can point to any path, it is common to use a "./lib" sub-directory of the 
+Solr Home Directory.
+
+* ZooKeeper Files *
+
+When using SolrCloud using the embedded ZooKeeper option for Solr, it is 
+common to have a "zoo.cfg" file and "zoo_data" directories in the Solr Home 
+Directory.  Please see the SolrCloud wiki page for more details...
+
+https://wiki.apache.org/solr/SolrCloud
diff --git a/zookeeper/solr/collection1/README.txt b/zookeeper/solr/collection1/README.txt

new file mode 100644 (file)

index 0000000..337d55b
--- /dev/null
+++ b/zookeeper/solr/collection1/README.txt
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+Example SolrCore Instance Directory
+=============================
+
+This directory is provided as an example of what an "Instance Directory"
+should look like for a SolrCore
+
+It's not strictly necessary that you copy all of the files in this
+directory when setting up a new SolrCores, but it is recommended.
+
+
+Basic Directory Structure
+-------------------------
+
+The Solr Home directory typically contains the following sub-directories...
+
+   conf/
+        This directory is mandatory and must contain your solrconfig.xml
+        and schema.xml.  Any other optional configuration files would also 
+        be kept here.
+
+   data/
+        This directory is the default location where Solr will keep your
+        index, and is used by the replication scripts for dealing with
+        snapshots.  You can override this location in the 
+        conf/solrconfig.xml.  Solr will create this directory if it does not 
+        already exist.
+
+   lib/
+        This directory is optional.  If it exists, Solr will load any Jars
+        found in this directory and use them to resolve any "plugins"
+        specified in your solrconfig.xml or schema.xml (ie: Analyzers,
+        Request Handlers, etc...).  Alternatively you can use the <lib>
+        syntax in conf/solrconfig.xml to direct Solr to your plugins.  See 
+        the example conf/solrconfig.xml file for details.
diff --git a/zookeeper/solr/collection1/conf/admin-extra.html b/zookeeper/solr/collection1/conf/admin-extra.html

new file mode 100644 (file)

index 0000000..fecab20
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/admin-extra.html
@@ -0,0 +1,24 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- The content of this page will be statically included into the top-
+right box of the cores overview page. Uncomment this as an example to 
+see there the content will show up.
+
+<img src="img/ico/construction.png"> This line will appear at the top-
+right box on collection1's Overview
+-->
diff --git a/zookeeper/solr/collection1/conf/admin-extra.menu-bottom.html b/zookeeper/solr/collection1/conf/admin-extra.menu-bottom.html

new file mode 100644 (file)

index 0000000..3359a46
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/admin-extra.menu-bottom.html
@@ -0,0 +1,25 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- admin-extra.menu-bottom.html -->
+<!--
+<li>
+  <a href="#" style="background-image: url(img/ico/construction.png);">
+    LAST ITEM
+  </a>
+</li>
+-->
diff --git a/zookeeper/solr/collection1/conf/admin-extra.menu-top.html b/zookeeper/solr/collection1/conf/admin-extra.menu-top.html

new file mode 100644 (file)

index 0000000..0886cee
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/admin-extra.menu-top.html
@@ -0,0 +1,25 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- admin-extra.menu-top.html -->
+<!--
+<li>
+  <a href="#" style="background-image: url(img/ico/construction.png);">
+    FIRST ITEM
+  </a>
+</li>
+-->
diff --git a/zookeeper/solr/collection1/conf/currency.xml b/zookeeper/solr/collection1/conf/currency.xml

new file mode 100644 (file)

index 0000000..3a9c58a
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/currency.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
+
+<currencyConfig version="1.0">
+  <rates>
+    <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
+    <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
+    <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
+    <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
+    <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
+    <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
+    <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
+    <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
+    <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
+    <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
+    <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
+    <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
+    <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
+    <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
+    <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
+    <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
+    <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
+    <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
+    <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
+    <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
+    <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
+    <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
+    <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
+    <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
+    <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
+    <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
+    <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
+    <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
+    <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
+    <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
+    <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
+    <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
+    <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
+    <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
+    <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
+    <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
+    <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
+    <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
+    
+    <!-- Cross-rates for some common currencies -->
+    <rate from="EUR" to="GBP" rate="0.869914" />  
+    <rate from="EUR" to="NOK" rate="7.800095" />  
+    <rate from="GBP" to="NOK" rate="8.966508" />  
+  </rates>
+</currencyConfig>
diff --git a/zookeeper/solr/collection1/conf/elevate.xml b/zookeeper/solr/collection1/conf/elevate.xml

new file mode 100644 (file)

index 0000000..25d5ceb
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/elevate.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- If this file is found in the config directory, it will only be
+     loaded once at startup.  If it is found in Solr's data
+     directory, it will be re-loaded every commit.
+
+   See http://wiki.apache.org/solr/QueryElevationComponent for more info
+
+-->
+<elevate>
+ <query text="foo bar">
+  <doc id="1" />
+  <doc id="2" />
+  <doc id="3" />
+ </query>
+ 
+ <query text="ipod">
+   <doc id="MA147LL/A" />  <!-- put the actual ipod at the top -->
+   <doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
+ </query>
+ 
+</elevate>
diff --git a/zookeeper/solr/collection1/conf/lang/contractions_ca.txt b/zookeeper/solr/collection1/conf/lang/contractions_ca.txt

new file mode 100644 (file)

index 0000000..307a85f
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/contractions_ca.txt
@@ -0,0 +1,8 @@
+# Set of Catalan contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+l
+m
+n
+s
+t
diff --git a/zookeeper/solr/collection1/conf/lang/contractions_fr.txt b/zookeeper/solr/collection1/conf/lang/contractions_fr.txt

new file mode 100644 (file)

index 0000000..f1bba51
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/contractions_fr.txt
@@ -0,0 +1,15 @@
+# Set of French contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+l
+m
+t
+qu
+n
+s
+j
+d
+c
+jusqu
+quoiqu
+lorsqu
+puisqu
diff --git a/zookeeper/solr/collection1/conf/lang/contractions_ga.txt b/zookeeper/solr/collection1/conf/lang/contractions_ga.txt

new file mode 100644 (file)

index 0000000..9ebe7fa
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/contractions_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+m
+b
diff --git a/zookeeper/solr/collection1/conf/lang/contractions_it.txt b/zookeeper/solr/collection1/conf/lang/contractions_it.txt

new file mode 100644 (file)

index 0000000..cac0409
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/contractions_it.txt
@@ -0,0 +1,23 @@
+# Set of Italian contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+c
+l 
+all 
+dall 
+dell 
+nell 
+sull 
+coll 
+pell 
+gl 
+agl 
+dagl 
+degl 
+negl 
+sugl 
+un 
+m 
+t 
+s 
+v 
+d
diff --git a/zookeeper/solr/collection1/conf/lang/hyphenations_ga.txt b/zookeeper/solr/collection1/conf/lang/hyphenations_ga.txt

new file mode 100644 (file)

index 0000000..4d2642c
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/hyphenations_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish hyphenations for StopFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+h
+n
+t
diff --git a/zookeeper/solr/collection1/conf/lang/stemdict_nl.txt b/zookeeper/solr/collection1/conf/lang/stemdict_nl.txt

new file mode 100644 (file)

index 0000000..4410729
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stemdict_nl.txt
@@ -0,0 +1,6 @@
+# Set of overrides for the dutch stemmer
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+fiets  fiets
+bromfiets      bromfiets
+ei     eier
+kind   kinder
diff --git a/zookeeper/solr/collection1/conf/lang/stoptags_ja.txt b/zookeeper/solr/collection1/conf/lang/stoptags_ja.txt

new file mode 100644 (file)

index 0000000..71b7508
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stoptags_ja.txt
@@ -0,0 +1,420 @@
+#
+# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
+#
+# Any token with a part-of-speech tag that exactly matches those defined in this
+# file are removed from the token stream.
+#
+# Set your own stoptags by uncommenting the lines below.  Note that comments are
+# not allowed on the same line as a stoptag.  See LUCENE-3745 for frequency lists,
+# etc. that can be useful for building you own stoptag set.
+#
+# The entire possible tagset is provided below for convenience.
+#
+#####
+#  noun: unclassified nouns
+#名詞
+#
+#  noun-common: Common nouns or nouns where the sub-classification is undefined
+#名詞-一般
+#
+#  noun-proper: Proper nouns where the sub-classification is undefined 
+#名詞-固有名詞
+#
+#  noun-proper-misc: miscellaneous proper nouns
+#名詞-固有名詞-一般
+#
+#  noun-proper-person: Personal names where the sub-classification is undefined
+#名詞-固有名詞-人名
+#
+#  noun-proper-person-misc: names that cannot be divided into surname and 
+#  given name; foreign names; names where the surname or given name is unknown.
+#  e.g. お市の方
+#名詞-固有名詞-人名-一般
+#
+#  noun-proper-person-surname: Mainly Japanese surnames.
+#  e.g. 山田
+#名詞-固有名詞-人名-姓
+#
+#  noun-proper-person-given_name: Mainly Japanese given names.
+#  e.g. 太郎
+#名詞-固有名詞-人名-名
+#
+#  noun-proper-organization: Names representing organizations.
+#  e.g. 通産省, NHK
+#名詞-固有名詞-組織
+#
+#  noun-proper-place: Place names where the sub-classification is undefined
+#名詞-固有名詞-地域
+#
+#  noun-proper-place-misc: Place names excluding countries.
+#  e.g. アジア, バルセロナ, 京都
+#名詞-固有名詞-地域-一般
+#
+#  noun-proper-place-country: Country names. 
+#  e.g. 日本, オーストラリア
+#名詞-固有名詞-地域-国
+#
+#  noun-pronoun: Pronouns where the sub-classification is undefined
+#名詞-代名詞
+#
+#  noun-pronoun-misc: miscellaneous pronouns: 
+#  e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
+#名詞-代名詞-一般
+#
+#  noun-pronoun-contraction: Spoken language contraction made by combining a 
+#  pronoun and the particle 'wa'.
+#  e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ 
+#名詞-代名詞-縮約
+#
+#  noun-adverbial: Temporal nouns such as names of days or months that behave 
+#  like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+#  e.g. 金曜, 一月, 午後, 少量
+#名詞-副詞可能
+#
+#  noun-verbal: Nouns that take arguments with case and can appear followed by 
+#  'suru' and related verbs (する, できる, なさる, くださる)
+#  e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
+#名詞-サ変接続
+#
+#  noun-adjective-base: The base form of adjectives, words that appear before な ("na")
+#  e.g. 健康, 安易, 駄目, だめ
+#名詞-形容動詞語幹
+#
+#  noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
+#  e.g. 0, 1, 2, 何, 数, 幾
+#名詞-数
+#
+#  noun-affix: noun affixes where the sub-classification is undefined
+#名詞-非自立
+#
+#  noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that 
+#  attach to the base form of inflectional words, words that cannot be classified 
+#  into any of the other categories below. This category includes indefinite nouns.
+#  e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, 
+#       順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, 
+#       拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
+#       わり, 割り, 割, ん-口語/, もん-口語/
+#名詞-非自立-一般
+#
+#  noun-affix-adverbial: noun affixes that that can behave as adverbs.
+#  e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, 
+#       上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, 
+#       最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, 
+#       とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, 
+#       儘, 侭, みぎり, 矢先
+#名詞-非自立-副詞可能
+#
+#  noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars 
+#  with the stem よう(だ) ("you(da)").
+#  e.g.  よう, やう, 様 (よう)
+#名詞-非自立-助動詞語幹
+#  
+#  noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+#  connection form な (aux "da").
+#  e.g. みたい, ふう
+#名詞-非自立-形容動詞語幹
+#
+#  noun-special: special nouns where the sub-classification is undefined.
+#名詞-特殊
+#
+#  noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is 
+#  treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base 
+#  form of inflectional words.
+#  e.g. そう
+#名詞-特殊-助動詞語幹
+#
+#  noun-suffix: noun suffixes where the sub-classification is undefined.
+#名詞-接尾
+#
+#  noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect 
+#  to ガル or タイ and can combine into compound nouns, words that cannot be classified into
+#  any of the other categories below. In general, this category is more inclusive than 
+#  接尾語 ("suffix") and is usually the last element in a compound noun.
+#  e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (～した) さ, 次第, 済 (ず) み,
+#       よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
+#名詞-接尾-一般
+#
+#  noun-suffix-person: Suffixes that form nouns and attach to person names more often
+#  than other nouns.
+#  e.g. 君, 様, 著
+#名詞-接尾-人名
+#
+#  noun-suffix-place: Suffixes that form nouns and attach to place names more often 
+#  than other nouns.
+#  e.g. 町, 市, 県
+#名詞-接尾-地域
+#
+#  noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that 
+#  can appear before スル ("suru").
+#  e.g. 化, 視, 分け, 入り, 落ち, 買い
+#名詞-接尾-サ変接続
+#
+#  noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, 
+#  is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the 
+#  conjunctive form of inflectional words.
+#  e.g. そう
+#名詞-接尾-助動詞語幹
+#
+#  noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive 
+#  form of inflectional words and appear before the copula だ ("da").
+#  e.g. 的, げ, がち
+#名詞-接尾-形容動詞語幹
+#
+#  noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+#  e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
+#名詞-接尾-副詞可能
+#
+#  noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category 
+#  is more inclusive than 助数詞 ("classifier") and includes common nouns that attach 
+#  to numbers.
+#  e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
+#名詞-接尾-助数詞
+#
+#  noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+#  e.g. (楽し) さ, (考え) 方
+#名詞-接尾-特殊
+#
+#  noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words 
+#  together.
+#  e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
+#名詞-接続詞的
+#
+#  noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are 
+#  semantically verb-like.
+#  e.g. ごらん, ご覧, 御覧, 頂戴
+#名詞-動詞非自立的
+#
+#  noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, 
+#  dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") 
+#  is いわく ("iwaku").
+#名詞-引用文字列
+#
+#  noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
+#  behave like an adjective.
+#  e.g. 申し訳, 仕方, とんでも, 違い
+#名詞-ナイ形容詞語幹
+#
+#####
+#  prefix: unclassified prefixes
+#接頭詞
+#
+#  prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) 
+#  excluding numerical expressions.
+#  e.g. お (水), 某 (氏), 同 (社), 故 (～氏), 高 (品質), お (見事), ご (立派)
+#接頭詞-名詞接続
+#
+#  prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+#  in conjunctive form followed by なる/なさる/くださる.
+#  e.g. お (読みなさい), お (座り)
+#接頭詞-動詞接続
+#
+#  prefix-adjectival: Prefixes that attach to adjectives.
+#  e.g. お (寒いですねえ), バカ (でかい)
+#接頭詞-形容詞接続
+#
+#  prefix-numerical: Prefixes that attach to numerical expressions.
+#  e.g. 約, およそ, 毎時
+#接頭詞-数接続
+#
+#####
+#  verb: unclassified verbs
+#動詞
+#
+#  verb-main:
+#動詞-自立
+#
+#  verb-auxiliary:
+#動詞-非自立
+#
+#  verb-suffix:
+#動詞-接尾
+#
+#####
+#  adjective: unclassified adjectives
+#形容詞
+#
+#  adjective-main:
+#形容詞-自立
+#
+#  adjective-auxiliary:
+#形容詞-非自立
+#
+#  adjective-suffix:
+#形容詞-接尾
+#
+#####
+#  adverb: unclassified adverbs
+#副詞
+#
+#  adverb-misc: Words that can be segmented into one unit and where adnominal 
+#  modification is not possible.
+#  e.g. あいかわらず, 多分
+#副詞-一般
+#
+#  adverb-particle_conjunction: Adverbs that can be followed by の, は, に, 
+#  な, する, だ, etc.
+#  e.g. こんなに, そんなに, あんなに, なにか, なんでも
+#副詞-助詞類接続
+#
+#####
+#  adnominal: Words that only have noun-modifying forms.
+#  e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, 
+#       どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, 
+#       「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
+#連体詞
+#
+#####
+#  conjunction: Conjunctions that can occur independently.
+#  e.g. が, けれども, そして, じゃあ, それどころか
+接続詞
+#
+#####
+#  particle: unclassified particles.
+助詞
+#
+#  particle-case: case particles where the subclassification is undefined.
+助詞-格助詞
+#
+#  particle-case-misc: Case particles.
+#  e.g. から, が, で, と, に, へ, より, を, の, にて
+助詞-格助詞-一般
+#
+#  particle-case-quote: the "to" that appears after nouns, a person’s speech, 
+#  quotation marks, expressions of decisions from a meeting, reasons, judgements,
+#  conjectures, etc.
+#  e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
+助詞-格助詞-引用
+#
+#  particle-case-compound: Compounds of particles and verbs that mainly behave 
+#  like case particles.
+#  e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
+#       にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, 
+#       にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, 
+#       に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, 
+#       に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
+#       にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, 
+#       にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
+#       って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
+助詞-格助詞-連語
+#
+#  particle-conjunctive:
+#  e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, 
+#       ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, 
+#       (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
+助詞-接続助詞
+#
+#  particle-dependency:
+#  e.g. こそ, さえ, しか, すら, は, も, ぞ
+助詞-係助詞
+#
+#  particle-adverbial:
+#  e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, 
+#       (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
+#       (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, 
+#       (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
+#       ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
+助詞-副助詞
+#
+#  particle-interjective: particles with interjective grammatical roles.
+#  e.g. (松島) や
+助詞-間投助詞
+#
+#  particle-coordinate:
+#  e.g. と, たり, だの, だり, とか, なり, や, やら
+助詞-並立助詞
+#
+#  particle-final:
+#  e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, 
+#       ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
+助詞-終助詞
+#
+#  particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is 
+#  adverbial, conjunctive, or sentence final. For example:
+#       (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
+#       (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
+#           「(祈りが届いたせい) か (, 試験に合格した.)」
+#       (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
+#  e.g. か
+助詞-副助詞／並立助詞／終助詞
+#
+#  particle-adnominalizer: The "no" that attaches to nouns and modifies 
+#  non-inflectional words.
+助詞-連体化
+#
+#  particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs 
+#  that are giongo, giseigo, or gitaigo.
+#  e.g. に, と
+助詞-副詞化
+#
+#  particle-special: A particle that does not fit into one of the above classifications. 
+#  This includes particles that are used in Tanka, Haiku, and other poetry.
+#  e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
+助詞-特殊
+#
+#####
+#  auxiliary-verb:
+助動詞
+#
+#####
+#  interjection: Greetings and other exclamations.
+#  e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, 
+#       いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
+#感動詞
+#
+#####
+#  symbol: unclassified Symbols.
+記号
+#
+#  symbol-misc: A general symbol not in one of the categories below.
+#  e.g. [○◎@$〒→+]
+記号-一般
+#
+#  symbol-comma: Commas
+#  e.g. [,、]
+記号-読点
+#
+#  symbol-period: Periods and full stops.
+#  e.g. [.．。]
+記号-句点
+#
+#  symbol-space: Full-width whitespace.
+記号-空白
+#
+#  symbol-open_bracket:
+#  e.g. [({‘“『【]
+記号-括弧開
+#
+#  symbol-close_bracket:
+#  e.g. [)}’”』」】]
+記号-括弧閉
+#
+#  symbol-alphabetic:
+#記号-アルファベット
+#
+#####
+#  other: unclassified other
+#その他
+#
+#  other-interjection: Words that are hard to classify as noun-suffixes or 
+#  sentence-final particles.
+#  e.g. (だ)ァ
+その他-間投
+#
+#####
+#  filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+#  e.g. あの, うんと, えと
+フィラー
+#
+#####
+#  non-verbal: non-verbal sound.
+非言語音
+#
+#####
+#  fragment:
+#語断片
+#
+#####
+#  unknown: unknown part of speech.
+#未知語
+#
+##### End of file
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ar.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ar.txt

new file mode 100644 (file)

index 0000000..046829d
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_ar.txt
@@ -0,0 +1,125 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some 
+# redundant entries, for example containing forms with both أ and ا
+من
+ومن
+منها
+منه
+في
+وفي
+فيها
+فيه
+و
+ف
+ثم
+او
+أو
+ب
+بها
+به
+ا
+أ
+اى
+اي
+أي
+أى
+لا
+ولا
+الا
+ألا
+إلا
+لكن
+ما
+وما
+كما
+فما
+عن
+مع
+اذا
+إذا
+ان
+أن
+إن
+انها
+أنها
+إنها
+انه
+أنه
+إنه
+بان
+بأن
+فان
+فأن
+وان
+وأن
+وإن
+التى
+التي
+الذى
+الذي
+الذين
+الى
+الي
+إلى
+إلي
+على
+عليها
+عليه
+اما
+أما
+إما
+ايضا
+أيضا
+كل
+وكل
+لم
+ولم
+لن
+ولن
+هى
+هي
+هو
+وهى
+وهي
+وهو
+فهى
+فهي
+فهو
+انت
+أنت
+لك
+لها
+له
+هذه
+هذا
+تلك
+ذلك
+هناك
+كانت
+كان
+يكون
+تكون
+وكانت
+وكان
+غير
+بعض
+قد
+نحو
+بين
+بينما
+منذ
+ضمن
+حيث
+الان
+الآن
+خلال
+بعد
+قبل
+حتى
+عند
+عندما
+لدى
+جميع
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_bg.txt b/zookeeper/solr/collection1/conf/lang/stopwords_bg.txt

new file mode 100644 (file)

index 0000000..1ae4ba2
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_bg.txt
@@ -0,0 +1,193 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+а
+аз
+ако
+ала
+бе
+без
+беше
+би
+бил
+била
+били
+било
+близо
+бъдат
+бъде
+бяха
+в
+вас
+ваш
+ваша
+вероятно
+вече
+взема
+ви
+вие
+винаги
+все
+всеки
+всички
+всичко
+всяка
+във
+въпреки
+върху
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докато
+докога
+дори
+досега
+доста
+е
+едва
+един
+ето
+за
+зад
+заедно
+заради
+засега
+затова
+защо
+защото
+и
+из
+или
+им
+има
+имат
+иска
+й
+каза
+как
+каква
+какво
+както
+какъв
+като
+кога
+когато
+което
+които
+кой
+който
+колко
+която
+къде
+където
+към
+ли
+м
+ме
+между
+мен
+ми
+мнозина
+мога
+могат
+може
+моля
+момента
+му
+н
+на
+над
+назад
+най
+направи
+напред
+например
+нас
+не
+него
+нея
+ни
+ние
+никой
+нито
+но
+някои
+някой
+няма
+обаче
+около
+освен
+особено
+от
+отгоре
+отново
+още
+пак
+по
+повече
+повечето
+под
+поне
+поради
+после
+почти
+прави
+пред
+преди
+през
+при
+пък
+първо
+с
+са
+само
+се
+сега
+си
+скоро
+след
+сме
+според
+сред
+срещу
+сте
+съм
+със
+също
+т
+тази
+така
+такива
+такъв
+там
+твой
+те
+тези
+ти
+тн
+то
+това
+тогава
+този
+той
+толкова
+точно
+трябва
+тук
+тъй
+тя
+тях
+у
+харесва
+ч
+че
+често
+чрез
+ще
+щом
+я
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ca.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ca.txt

new file mode 100644 (file)

index 0000000..3da65de
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_ca.txt
@@ -0,0 +1,220 @@
+# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
+a
+abans
+ací
+ah
+així
+això
+al
+als
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allà
+allí
+allò
+altra
+altre
+altres
+amb
+ambdós
+ambdues
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquí
+baix
+cada
+cadascú
+cadascuna
+cadascunes
+cadascuns
+com
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+després
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+els
+em
+en
+encara
+ens
+entre
+érem
+eren
+éreu
+es
+és
+esta
+està
+estàvem
+estaven
+estàveu
+esteu
+et
+etc
+ets
+fins
+fora
+gairebé
+ha
+han
+has
+havia
+he
+hem
+heu
+hi 
+ho
+i
+igual
+iguals
+ja
+l'hi
+la
+les
+li
+li'n
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+més
+meu
+meus
+meva
+meves
+molt
+molta
+moltes
+molts
+mon
+mons
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+només
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+però
+perquè
+poc 
+poca
+pocs
+poques
+potser
+propi
+qual
+quals
+quan
+quant 
+que
+què
+quelcom
+qui
+quin
+quina
+quines
+quins
+s'ha
+s'han
+sa
+semblant
+semblants
+ses
+seu 
+seus
+seva
+seva
+seves
+si
+sobre
+sobretot
+sóc
+solament
+sols
+son 
+són
+sons 
+sota
+sou
+t'ha
+t'han
+t'he
+ta
+tal
+també
+tampoc
+tan
+tant
+tanta
+tantes
+teu
+teus
+teva
+teves
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_cz.txt b/zookeeper/solr/collection1/conf/lang/stopwords_cz.txt

new file mode 100644 (file)

index 0000000..53c6097
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_cz.txt
@@ -0,0 +1,172 @@
+a
+s
+k
+o
+i
+u
+v
+z
+dnes
+cz
+tímto
+budeš
+budem
+byli
+jseš
+můj
+svým
+ta
+tomto
+tohle
+tuto
+tyto
+jej
+zda
+proč
+máte
+tato
+kam
+tohoto
+kdo
+kteří
+mi
+nám
+tom
+tomuto
+mít
+nic
+proto
+kterou
+byla
+toho
+protože
+asi
+ho
+naši
+napište
+re
+což
+tím
+takže
+svých
+její
+svými
+jste
+aj
+tu
+tedy
+teto
+bylo
+kde
+ke
+pravé
+ji
+nad
+nejsou
+či
+pod
+téma
+mezi
+přes
+ty
+pak
+vám
+ani
+když
+však
+neg
+jsem
+tento
+článku
+články
+aby
+jsme
+před
+pta
+jejich
+byl
+ještě
+až
+bez
+také
+pouze
+první
+vaše
+která
+nás
+nový
+tipy
+pokud
+může
+strana
+jeho
+své
+jiné
+zprávy
+nové
+není
+vás
+jen
+podle
+zde
+už
+být
+více
+bude
+již
+než
+který
+by
+které
+co
+nebo
+ten
+tak
+má
+při
+od
+po
+jsou
+jak
+další
+ale
+si
+se
+ve
+to
+jako
+za
+zpět
+ze
+do
+pro
+je
+na
+atd
+atp
+jakmile
+přičemž
+já
+on
+ona
+ono
+oni
+ony
+my
+vy
+jí
+ji
+mě
+mne
+jemu
+tomu
+těm
+těmu
+němu
+němuž
+jehož
+jíž
+jelikož
+jež
+jakož
+načež
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_da.txt b/zookeeper/solr/collection1/conf/lang/stopwords_da.txt

new file mode 100644 (file)

index 0000000..a3ff5fe
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_da.txt
@@ -0,0 +1,108 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og           | and
+i            | in
+jeg          | I
+det          | that (dem. pronoun)/it (pers. pronoun)
+at           | that (in front of a sentence)/to (with infinitive)
+en           | a/an
+den          | it (pers. pronoun)/that (dem. pronoun)
+til          | to/at/for/until/against/by/of/into, more
+er           | present tense of "to be"
+som          | who, as
+på           | on/upon/in/on/at/to/after/of/with/for, on
+de           | they
+med          | with/by/in, along
+han          | he
+af           | of/by/from/off/for/in/with/on, off
+for          | at/for/to/from/by/of/ago, in front/before, because
+ikke         | not
+der          | who/which, there/those
+var          | past tense of "to be"
+mig          | me/myself
+sig          | oneself/himself/herself/itself/themselves
+men          | but
+et           | a/an/one, one (number), someone/somebody/one
+har          | present tense of "to have"
+om           | round/about/for/in/a, about/around/down, if
+vi           | we
+min          | my
+havde        | past tense of "to have"
+ham          | him
+hun          | she
+nu           | now
+over         | over/above/across/by/beyond/past/on/about, over/past
+da           | then, when/as/since
+fra          | from/off/since, off, since
+du           | you
+ud           | out
+sin          | his/her/its/one's
+dem          | them
+os           | us/ourselves
+op           | up
+man          | you/one
+hans         | his
+hvor         | where
+eller        | or
+hvad         | what
+skal         | must/shall etc.
+selv         | myself/youself/herself/ourselves etc., even
+her          | here
+alle         | all/everyone/everybody etc.
+vil          | will (verb)
+blev         | past tense of "to stay/to remain/to get/to become"
+kunne        | could
+ind          | in
+når          | when
+være         | present tense of "to be"
+dog          | however/yet/after all
+noget        | something
+ville        | would
+jo           | you know/you see (adv), yes
+deres        | their/theirs
+efter        | after/behind/according to/for/by/from, later/afterwards
+ned          | down
+skulle       | should
+denne        | this
+end          | than
+dette        | this
+mit          | my/mine
+også         | also
+under        | under/beneath/below/during, below/underneath
+have         | have
+dig          | you
+anden        | other
+hende        | her
+mine         | my
+alt          | everything
+meget        | much/very, plenty of
+sit          | his, her, its, one's
+sine         | his, her, its, one's
+vor          | our
+mod          | against
+disse        | these
+hvis         | if
+din          | your/yours
+nogle        | some
+hos          | by/at
+blive        | be/become
+mange        | many
+ad           | by/through
+bliver       | present tense of "to be/to become"
+hendes       | her/hers
+været        | be
+thi          | for (conj)
+jer          | you
+sådan        | such, like this/like that
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_de.txt b/zookeeper/solr/collection1/conf/lang/stopwords_de.txt

new file mode 100644 (file)

index 0000000..f770384
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_de.txt
@@ -0,0 +1,292 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber           |  but
+
+alle           |  all
+allem
+allen
+aller
+alles
+
+als            |  than, as
+also           |  so
+am             |  an + dem
+an             |  at
+
+ander          |  other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch           |  also
+auf            |  on
+aus            |  out of
+bei            |  by
+bin            |  am
+bis            |  until
+bist           |  art
+da             |  there
+damit          |  with it
+dann           |  then
+
+der            |  the
+den
+des
+dem
+die
+das
+
+daß            |  that
+
+derselbe       |  the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu           |  to that
+
+dein           |  thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn           |  because
+
+derer          |  of those
+dessen         |  of him
+
+dich           |  thee
+dir            |  to thee
+du             |  thou
+
+dies           |  this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch           |  (several meanings)
+dort           |  (over) there
+
+
+durch          |  through
+
+ein            |  a
+eine
+einem
+einen
+einer
+eines
+
+einig          |  some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal         |  once
+
+er             |  he
+ihn            |  him
+ihm            |  to him
+
+es             |  it
+etwas          |  something
+
+euer           |  your
+eure
+eurem
+euren
+eurer
+eures
+
+für            |  for
+gegen          |  towards
+gewesen        |  p.p. of sein
+hab            |  have
+habe           |  have
+haben          |  have
+hat            |  has
+hatte          |  had
+hatten         |  had
+hier           |  here
+hin            |  there
+hinter         |  behind
+
+ich            |  I
+mich           |  me
+mir            |  to me
+
+
+ihr            |  you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch           |  to you
+
+im             |  in + dem
+in             |  in
+indem          |  while
+ins            |  in + das
+ist            |  is
+
+jede           |  each, every
+jedem
+jeden
+jeder
+jedes
+
+jene           |  that
+jenem
+jenen
+jener
+jenes
+
+jetzt          |  now
+kann           |  can
+
+kein           |  no
+keine
+keinem
+keinen
+keiner
+keines
+
+können         |  can
+könnte         |  could
+machen         |  do
+man            |  one
+
+manche         |  some, many a
+manchem
+manchen
+mancher
+manches
+
+mein           |  my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit            |  with
+muss           |  must
+musste         |  had to
+nach           |  to(wards)
+nicht          |  not
+nichts         |  nothing
+noch           |  still, yet
+nun            |  now
+nur            |  only
+ob             |  whether
+oder           |  or
+ohne           |  without
+sehr           |  very
+
+sein           |  his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst         |  self
+sich           |  herself
+
+sie            |  they, she
+ihnen          |  to them
+
+sind           |  are
+so             |  so
+
+solche         |  such
+solchem
+solchen
+solcher
+solches
+
+soll           |  shall
+sollte         |  should
+sondern        |  but
+sonst          |  else
+über           |  over
+um             |  about, around
+und            |  and
+
+uns            |  us
+unse
+unsem
+unsen
+unser
+unses
+
+unter          |  under
+viel           |  much
+vom            |  von + dem
+von            |  from
+vor            |  before
+während        |  while
+war            |  was
+waren          |  were
+warst          |  wast
+was            |  what
+weg            |  away, off
+weil           |  because
+weiter         |  further
+
+welche         |  which
+welchem
+welchen
+welcher
+welches
+
+wenn           |  when
+werde          |  will
+werden         |  will
+wie            |  how
+wieder         |  again
+will           |  want
+wir            |  we
+wird           |  will
+wirst          |  willst
+wo             |  where
+wollen         |  want
+wollte         |  wanted
+würde          |  would
+würden         |  would
+zu             |  to
+zum            |  zu + dem
+zur            |  zu + der
+zwar           |  indeed
+zwischen       |  between
+
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_el.txt b/zookeeper/solr/collection1/conf/lang/stopwords_el.txt

new file mode 100644 (file)

index 0000000..232681f
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_el.txt
@@ -0,0 +1,78 @@
+# Lucene Greek Stopwords list
+# Note: by default this file is used after GreekLowerCaseFilter,
+# so when modifying this file use 'σ' instead of 'ς' 
+ο
+η
+το
+οι
+τα
+του
+τησ
+των
+τον
+την
+και 
+κι
+κ
+ειμαι
+εισαι
+ειναι
+ειμαστε
+ειστε
+στο
+στον
+στη
+στην
+μα
+αλλα
+απο
+για
+προσ
+με
+σε
+ωσ
+παρα
+αντι
+κατα
+μετα
+θα
+να
+δε
+δεν
+μη
+μην
+επι
+ενω
+εαν
+αν
+τοτε
+που
+πωσ
+ποιοσ
+ποια
+ποιο
+ποιοι
+ποιεσ
+ποιων
+ποιουσ
+αυτοσ
+αυτη
+αυτο
+αυτοι
+αυτων
+αυτουσ
+αυτεσ
+αυτα
+εκεινοσ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεσ
+εκεινα
+εκεινων
+εκεινουσ
+οπωσ
+ομωσ
+ισωσ
+οσο
+οτι
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_en.txt b/zookeeper/solr/collection1/conf/lang/stopwords_en.txt

new file mode 100644 (file)

index 0000000..2c164c0
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_es.txt b/zookeeper/solr/collection1/conf/lang/stopwords_es.txt

new file mode 100644 (file)

index 0000000..2db1476
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_es.txt
@@ -0,0 +1,354 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de             |  from, of
+la             |  the, her
+que            |  who, that
+el             |  the
+en             |  in
+y              |  and
+a              |  to
+los            |  the, them
+del            |  de + el
+se             |  himself, from him etc
+las            |  the, them
+por            |  for, by, etc
+un             |  a
+para           |  for
+con            |  with
+no             |  no
+una            |  a
+su             |  his, her
+al             |  a + el
+  | es         from SER
+lo             |  him
+como           |  how
+más            |  more
+pero           |  pero
+sus            |  su plural
+le             |  to him, her
+ya             |  already
+o              |  or
+  | fue        from SER
+este           |  this
+  | ha         from HABER
+sí             |  himself etc
+porque         |  because
+esta           |  this
+  | son        from SER
+entre          |  between
+  | está     from ESTAR
+cuando         |  when
+muy            |  very
+sin            |  without
+sobre          |  on
+  | ser        from SER
+  | tiene      from TENER
+también        |  also
+me             |  me
+hasta          |  until
+hay            |  there is/are
+donde          |  where
+  | han        from HABER
+quien          |  whom, that
+  | están      from ESTAR
+  | estado     from ESTAR
+desde          |  from
+todo           |  all
+nos            |  us
+durante        |  during
+  | estados    from ESTAR
+todos          |  all
+uno            |  a
+les            |  to them
+ni             |  nor
+contra         |  against
+otros          |  other
+  | fueron     from SER
+ese            |  that
+eso            |  that
+  | había      from HABER
+ante           |  before
+ellos          |  they
+e              |  and (variant of y)
+esto           |  this
+mí             |  me
+antes          |  before
+algunos        |  some
+qué            |  what?
+unos           |  a
+yo             |  I
+otro           |  other
+otras          |  other
+otra           |  other
+él             |  he
+tanto          |  so much, many
+esa            |  that
+estos          |  these
+mucho          |  much, many
+quienes        |  who
+nada           |  nothing
+muchos         |  many
+cual           |  who
+  | sea        from SER
+poco           |  few
+ella           |  she
+estar          |  to be
+  | haber      from HABER
+estas          |  these
+  | estaba     from ESTAR
+  | estamos    from ESTAR
+algunas        |  some
+algo           |  something
+nosotros       |  we
+
+      | other forms
+
+mi             |  me
+mis            |  mi plural
+tú             |  thou
+te             |  thee
+ti             |  thee
+tu             |  thy
+tus            |  tu plural
+ellas          |  they
+nosotras       |  we
+vosotros       |  you
+vosotras       |  you
+os             |  you
+mío            |  mine
+mía            |
+míos           |
+mías           |
+tuyo           |  thine
+tuya           |
+tuyos          |
+tuyas          |
+suyo           |  his, hers, theirs
+suya           |
+suyos          |
+suyas          |
+nuestro        |  ours
+nuestra        |
+nuestros       |
+nuestras       |
+vuestro        |  yours
+vuestra        |
+vuestros       |
+vuestras       |
+esos           |  those
+esas           |  those
+
+               | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estaría
+estarías
+estaríamos
+estaríais
+estarían
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+               | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habría
+habrías
+habríamos
+habríais
+habrían
+había
+habías
+habíamos
+habíais
+habían
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+               | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+sería
+serías
+seríamos
+seríais
+serían
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+  |  sed also means 'thirst'
+
+               | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendría
+tendrías
+tendríamos
+tendríais
+tendrían
+tenía
+tenías
+teníamos
+teníais
+tenían
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_eu.txt b/zookeeper/solr/collection1/conf/lang/stopwords_eu.txt

new file mode 100644 (file)

index 0000000..25f1db9
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_eu.txt
@@ -0,0 +1,99 @@
+# example set of basque stopwords
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_fa.txt b/zookeeper/solr/collection1/conf/lang/stopwords_fa.txt

new file mode 100644 (file)

index 0000000..723641c
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_fa.txt
@@ -0,0 +1,313 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Note: by default this file is used after normalization, so when adding entries
+# to this file, use the arabic 'ي' instead of 'ی'
+انان
+نداشته
+سراسر
+خياه
+ايشان
+وي
+تاكنون
+بيشتري
+دوم
+پس
+ناشي
+وگو
+يا
+داشتند
+سپس
+هنگام
+هرگز
+پنج
+نشان
+امسال
+ديگر
+گروهي
+شدند
+چطور
+ده
+و
+دو
+نخستين
+ولي
+چرا
+چه
+وسط
+ه
+كدام
+قابل
+يك
+رفت
+هفت
+همچنين
+در
+هزار
+بله
+بلي
+شايد
+اما
+شناسي
+گرفته
+دهد
+داشته
+دانست
+داشتن
+خواهيم
+ميليارد
+وقتيكه
+امد
+خواهد
+جز
+اورده
+شده
+بلكه
+خدمات
+شدن
+برخي
+نبود
+بسياري
+جلوگيري
+حق
+كردند
+نوعي
+بعري
+نكرده
+نظير
+نبايد
+بوده
+بودن
+داد
+اورد
+هست
+جايي
+شود
+دنبال
+داده
+بايد
+سابق
+هيچ
+همان
+انجا
+كمتر
+كجاست
+گردد
+كسي
+تر
+مردم
+تان
+دادن
+بودند
+سري
+جدا
+ندارند
+مگر
+يكديگر
+دارد
+دهند
+بنابراين
+هنگامي
+سمت
+جا
+انچه
+خود
+دادند
+زياد
+دارند
+اثر
+بدون
+بهترين
+بيشتر
+البته
+به
+براساس
+بيرون
+كرد
+بعضي
+گرفت
+توي
+اي
+ميليون
+او
+جريان
+تول
+بر
+مانند
+برابر
+باشيم
+مدتي
+گويند
+اكنون
+تا
+تنها
+جديد
+چند
+بي
+نشده
+كردن
+كردم
+گويد
+كرده
+كنيم
+نمي
+نزد
+روي
+قصد
+فقط
+بالاي
+ديگران
+اين
+ديروز
+توسط
+سوم
+ايم
+دانند
+سوي
+استفاده
+شما
+كنار
+داريم
+ساخته
+طور
+امده
+رفته
+نخست
+بيست
+نزديك
+طي
+كنيد
+از
+انها
+تمامي
+داشت
+يكي
+طريق
+اش
+چيست
+روب
+نمايد
+گفت
+چندين
+چيزي
+تواند
+ام
+ايا
+با
+ان
+ايد
+ترين
+اينكه
+ديگري
+راه
+هايي
+بروز
+همچنان
+پاعين
+كس
+حدود
+مختلف
+مقابل
+چيز
+گيرد
+ندارد
+ضد
+همچون
+سازي
+شان
+مورد
+باره
+مرسي
+خويش
+برخوردار
+چون
+خارج
+شش
+هنوز
+تحت
+ضمن
+هستيم
+گفته
+فكر
+بسيار
+پيش
+براي
+روزهاي
+انكه
+نخواهد
+بالا
+كل
+وقتي
+كي
+چنين
+كه
+گيري
+نيست
+است
+كجا
+كند
+نيز
+يابد
+بندي
+حتي
+توانند
+عقب
+خواست
+كنند
+بين
+تمام
+همه
+ما
+باشند
+مثل
+شد
+اري
+باشد
+اره
+طبق
+بعد
+اگر
+صورت
+غير
+جاي
+بيش
+ريزي
+اند
+زيرا
+چگونه
+بار
+لطفا
+مي
+درباره
+من
+ديده
+همين
+گذاري
+برداري
+علت
+گذاشته
+هم
+فوق
+نه
+ها
+شوند
+اباد
+همواره
+هر
+اول
+خواهند
+چهار
+نام
+امروز
+مان
+هاي
+قبل
+كنم
+سعي
+تازه
+را
+هستند
+زير
+جلوي
+عنوان
+بود
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_fi.txt b/zookeeper/solr/collection1/conf/lang/stopwords_fi.txt

new file mode 100644 (file)

index 0000000..addad79
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_fi.txt
@@ -0,0 +1,95 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ 
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole        | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en         | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans
+minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I
+sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you
+hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she
+me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we
+te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you
+he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they
+
+tämä   tämän         tätä   tässä   tästä   tähän  tallä   tältä   tälle   tänä   täksi  | this
+tuo    tuon          tuotä  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that
+se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it
+nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these
+nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those
+ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they
+
+kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl)
+mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what
+mitkä                                                                                    | (pl)
+
+joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which
+jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl)
+
+| conjunctions
+
+että   | that
+ja     | and
+jos    | if
+koska  | because
+kuin   | than
+mutta  | but
+niin   | so
+sekä   | and
+sillä  | for
+tai    | or
+vaan   | but
+vai    | or
+vaikka | although
+
+
+| prepositions
+
+kanssa  | with
+mukaan  | according to
+noin    | about
+poikki  | across
+yli     | over, across
+
+| other
+
+kun    | when
+niin   | so
+nyt    | now
+itse   | self
+
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_fr.txt b/zookeeper/solr/collection1/conf/lang/stopwords_fr.txt

new file mode 100644 (file)

index 0000000..20d12cb
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_fr.txt
@@ -0,0 +1,184 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au             |  a + le
+aux            |  a + les
+avec           |  with
+ce             |  this
+ces            |  these
+dans           |  with
+de             |  of
+des            |  de + les
+du             |  de + le
+elle           |  she
+en             |  `of them' etc
+et             |  and
+eux            |  them
+il             |  he
+je             |  I
+la             |  the
+le             |  the
+leur           |  their
+lui            |  him
+ma             |  my (fem)
+mais           |  but
+me             |  me
+même           |  same; as in moi-même (myself) etc
+mes            |  me (pl)
+moi            |  me
+mon            |  my (masc)
+ne             |  not
+nos            |  our (pl)
+notre          |  our
+nous           |  we
+on             |  one
+ou             |  where
+par            |  by
+pas            |  not
+pour           |  for
+qu             |  que before vowel
+que            |  that
+qui            |  who
+sa             |  his, her (fem)
+se             |  oneself
+ses            |  his (pl)
+son            |  his, her (masc)
+sur            |  on
+ta             |  thy (fem)
+te             |  thee
+tes            |  thy (pl)
+toi            |  thee
+ton            |  thy (masc)
+tu             |  thou
+un             |  a
+une            |  a
+vos            |  your (pl)
+votre          |  your
+vous           |  you
+
+               |  single letter forms
+
+c              |  c'
+d              |  d'
+j              |  j'
+l              |  l'
+à              |  to, at
+m              |  m'
+n              |  n'
+s              |  s'
+t              |  t'
+y              |  there
+
+               | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+               | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+               | Later additions (from Jean-Christophe Deschamps)
+ceci           |  this
+cela           |  that
+celà           |  that
+cet            |  this
+cette          |  this
+ici            |  here
+ils            |  they
+les            |  the (pl)
+leurs          |  their (pl)
+quel           |  which
+quels          |  which
+quelle         |  which
+quelles        |  which
+sans           |  without
+soi            |  oneself
+
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ga.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ga.txt

new file mode 100644 (file)

index 0000000..9ff88d7
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_ga.txt
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhúr
+caoga
+ceathair
+ceathrar
+chomh
+chtó
+chuig
+chun
+cois
+céad
+cúig
+cúigear
+d'
+daichead
+dar
+de
+deich
+deichniúr
+den
+dhá
+do
+don
+dtí
+dá
+dár
+dó
+faoi
+faoin
+faoina
+faoinár
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inár
+is
+le
+leis
+lena
+lenár
+m'
+mar
+mo
+mé
+na
+nach
+naoi
+naonúr
+ná
+ní
+níor
+nó
+nócha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtó
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sé
+sí
+tar
+thar
+thú
+triúr
+trí
+trína
+trínár
+tríocha
+tú
+um
+ár
+é
+éis
+í
+ó
+ón
+óna
+ónár
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_gl.txt b/zookeeper/solr/collection1/conf/lang/stopwords_gl.txt

new file mode 100644 (file)

index 0000000..d8760b1
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_gl.txt
@@ -0,0 +1,161 @@
+# galican stopwords
+a
+aínda
+alí
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquí
+ao
+aos
+as
+así
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+había
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_hi.txt b/zookeeper/solr/collection1/conf/lang/stopwords_hi.txt

new file mode 100644 (file)

index 0000000..86286bb
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_hi.txt
@@ -0,0 +1,235 @@
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# Note: by default this file also contains forms normalized by HindiNormalizer 
+# for spelling variation (see section below), such that it can be used whether or 
+# not you enable that feature. When adding additional entries to this list,
+# please add the normalized form as well. 
+अंदर
+अत
+अपना
+अपनी
+अपने
+अभी
+आदि
+आप
+इत्यादि
+इन 
+इनका
+इन्हीं
+इन्हें
+इन्हों
+इस
+इसका
+इसकी
+इसके
+इसमें
+इसी
+इसे
+उन
+उनका
+उनकी
+उनके
+उनको
+उन्हीं
+उन्हें
+उन्हों
+उस
+उसके
+उसी
+उसे
+एक
+एवं
+एस
+ऐसे
+और
+कई
+कर
+करता
+करते
+करना
+करने
+करें
+कहते
+कहा
+का
+काफ़ी
+कि
+कितना
+किन्हें
+किन्हों
+किया
+किर
+किस
+किसी
+किसे
+की
+कुछ
+कुल
+के
+को
+कोई
+कौन
+कौनसा
+गया
+घर
+जब
+जहाँ
+जा
+जितना
+जिन
+जिन्हें
+जिन्हों
+जिस
+जिसे
+जीधर
+जैसा
+जैसे
+जो
+तक
+तब
+तरह
+तिन
+तिन्हें
+तिन्हों
+तिस
+तिसे
+तो
+था
+थी
+थे
+दबारा
+दिया
+दुसरा
+दूसरे
+दो
+द्वारा
+न
+नहीं
+ना
+निहायत
+नीचे
+ने
+पर
+पर  
+पहले
+पूरा
+पे
+फिर
+बनी
+बही
+बहुत
+बाद
+बाला
+बिलकुल
+भी
+भीतर
+मगर
+मानो
+मे
+में
+यदि
+यह
+यहाँ
+यही
+या
+यिह 
+ये
+रखें
+रहा
+रहे
+ऱ्वासा
+लिए
+लिये
+लेकिन
+व
+वर्ग
+वह
+वह 
+वहाँ
+वहीं
+वाले
+वुह 
+वे
+वग़ैरह
+संग
+सकता
+सकते
+सबसे
+सभी
+साथ
+साबुत
+साभ
+सारा
+से
+सो
+ही
+हुआ
+हुई
+हुए
+है
+हैं
+हो
+होता
+होती
+होते
+होना
+होने
+# additional normalized forms of the above
+अपनि
+जेसे
+होति
+सभि
+तिंहों
+इंहों
+दवारा
+इसि
+किंहें
+थि
+उंहों
+ओर
+जिंहें
+वहिं
+अभि
+बनि
+हि
+उंहिं
+उंहें
+हें
+वगेरह
+एसे
+रवासा
+कोन
+निचे
+काफि
+उसि
+पुरा
+भितर
+हे
+बहि
+वहां
+कोइ
+यहां
+जिंहों
+तिंहें
+किसि
+कइ
+यहि
+इंहिं
+जिधर
+इंहें
+अदि
+इतयादि
+हुइ
+कोनसा
+इसकि
+दुसरे
+जहां
+अप
+किंहों
+उनकि
+भि
+वरग
+हुअ
+जेसा
+नहिं
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_hu.txt b/zookeeper/solr/collection1/conf/lang/stopwords_hu.txt

new file mode 100644 (file)

index 0000000..1a96f1d
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_hu.txt
@@ -0,0 +1,209 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ 
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amíg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elő
+először
+előtt
+első
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+így
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kívül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+míg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+ő
+ők
+őket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_hy.txt b/zookeeper/solr/collection1/conf/lang/stopwords_hy.txt

new file mode 100644 (file)

index 0000000..60c1c50
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_hy.txt
@@ -0,0 +1,46 @@
+# example set of Armenian stopwords.
+այդ
+այլ
+այն
+այս
+դու
+դուք
+եմ
+են
+ենք
+ես
+եք
+է
+էի
+էին
+էինք
+էիր
+էիք
+էր
+ըստ
+թ
+ի
+ին
+իսկ
+իր
+կամ
+համար
+հետ
+հետո
+մենք
+մեջ
+մի
+ն
+նա
+նաև
+նրա
+նրանք
+որ
+որը
+որոնք
+որպես
+ու
+ում
+պիտի
+վրա
+և
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_id.txt b/zookeeper/solr/collection1/conf/lang/stopwords_id.txt

new file mode 100644 (file)

index 0000000..4617f83
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_id.txt
@@ -0,0 +1,359 @@
+# from appendix D of: A Study of Stemming Effects on Information
+# Retrieval in Bahasa Indonesia
+ada
+adanya
+adalah
+adapun
+agak
+agaknya
+agar
+akan
+akankah
+akhirnya
+aku
+akulah
+amat
+amatlah
+anda
+andalah
+antar
+diantaranya
+antara
+antaranya
+diantara
+apa
+apaan
+mengapa
+apabila
+apakah
+apalagi
+apatah
+atau
+ataukah
+ataupun
+bagai
+bagaikan
+sebagai
+sebagainya
+bagaimana
+bagaimanapun
+sebagaimana
+bagaimanakah
+bagi
+bahkan
+bahwa
+bahwasanya
+sebaliknya
+banyak
+sebanyak
+beberapa
+seberapa
+begini
+beginian
+beginikah
+beginilah
+sebegini
+begitu
+begitukah
+begitulah
+begitupun
+sebegitu
+belum
+belumlah
+sebelum
+sebelumnya
+sebenarnya
+berapa
+berapakah
+berapalah
+berapapun
+betulkah
+sebetulnya
+biasa
+biasanya
+bila
+bilakah
+bisa
+bisakah
+sebisanya
+boleh
+bolehkah
+bolehlah
+buat
+bukan
+bukankah
+bukanlah
+bukannya
+cuma
+percuma
+dahulu
+dalam
+dan
+dapat
+dari
+daripada
+dekat
+demi
+demikian
+demikianlah
+sedemikian
+dengan
+depan
+di
+dia
+dialah
+dini
+diri
+dirinya
+terdiri
+dong
+dulu
+enggak
+enggaknya
+entah
+entahlah
+terhadap
+terhadapnya
+hal
+hampir
+hanya
+hanyalah
+harus
+haruslah
+harusnya
+seharusnya
+hendak
+hendaklah
+hendaknya
+hingga
+sehingga
+ia
+ialah
+ibarat
+ingin
+inginkah
+inginkan
+ini
+inikah
+inilah
+itu
+itukah
+itulah
+jangan
+jangankan
+janganlah
+jika
+jikalau
+juga
+justru
+kala
+kalau
+kalaulah
+kalaupun
+kalian
+kami
+kamilah
+kamu
+kamulah
+kan
+kapan
+kapankah
+kapanpun
+dikarenakan
+karena
+karenanya
+ke
+kecil
+kemudian
+kenapa
+kepada
+kepadanya
+ketika
+seketika
+khususnya
+kini
+kinilah
+kiranya
+sekiranya
+kita
+kitalah
+kok
+lagi
+lagian
+selagi
+lah
+lain
+lainnya
+melainkan
+selaku
+lalu
+melalui
+terlalu
+lama
+lamanya
+selama
+selama
+selamanya
+lebih
+terlebih
+bermacam
+macam
+semacam
+maka
+makanya
+makin
+malah
+malahan
+mampu
+mampukah
+mana
+manakala
+manalagi
+masih
+masihkah
+semasih
+masing
+mau
+maupun
+semaunya
+memang
+mereka
+merekalah
+meski
+meskipun
+semula
+mungkin
+mungkinkah
+nah
+namun
+nanti
+nantinya
+nyaris
+oleh
+olehnya
+seorang
+seseorang
+pada
+padanya
+padahal
+paling
+sepanjang
+pantas
+sepantasnya
+sepantasnyalah
+para
+pasti
+pastilah
+per
+pernah
+pula
+pun
+merupakan
+rupanya
+serupa
+saat
+saatnya
+sesaat
+saja
+sajalah
+saling
+bersama
+sama
+sesama
+sambil
+sampai
+sana
+sangat
+sangatlah
+saya
+sayalah
+se
+sebab
+sebabnya
+sebuah
+tersebut
+tersebutlah
+sedang
+sedangkan
+sedikit
+sedikitnya
+segala
+segalanya
+segera
+sesegera
+sejak
+sejenak
+sekali
+sekalian
+sekalipun
+sesekali
+sekaligus
+sekarang
+sekarang
+sekitar
+sekitarnya
+sela
+selain
+selalu
+seluruh
+seluruhnya
+semakin
+sementara
+sempat
+semua
+semuanya
+sendiri
+sendirinya
+seolah
+seperti
+sepertinya
+sering
+seringnya
+serta
+siapa
+siapakah
+siapapun
+disini
+disinilah
+sini
+sinilah
+sesuatu
+sesuatunya
+suatu
+sesudah
+sesudahnya
+sudah
+sudahkah
+sudahlah
+supaya
+tadi
+tadinya
+tak
+tanpa
+setelah
+telah
+tentang
+tentu
+tentulah
+tentunya
+tertentu
+seterusnya
+tapi
+tetapi
+setiap
+tiap
+setidaknya
+tidak
+tidakkah
+tidaklah
+toh
+waduh
+wah
+wahai
+sewaktu
+walau
+walaupun
+wong
+yaitu
+yakni
+yang
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_it.txt b/zookeeper/solr/collection1/conf/lang/stopwords_it.txt

new file mode 100644 (file)

index 0000000..4cb5b08
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_it.txt
@@ -0,0 +1,301 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad             |  a (to) before vowel
+al             |  a + il
+allo           |  a + lo
+ai             |  a + i
+agli           |  a + gli
+all            |  a + l'
+agl            |  a + gl'
+alla           |  a + la
+alle           |  a + le
+con            |  with
+col            |  con + il
+coi            |  con + i (forms collo, cogli etc are now very rare)
+da             |  from
+dal            |  da + il
+dallo          |  da + lo
+dai            |  da + i
+dagli          |  da + gli
+dall           |  da + l'
+dagl           |  da + gll'
+dalla          |  da + la
+dalle          |  da + le
+di             |  of
+del            |  di + il
+dello          |  di + lo
+dei            |  di + i
+degli          |  di + gli
+dell           |  di + l'
+degl           |  di + gl'
+della          |  di + la
+delle          |  di + le
+in             |  in
+nel            |  in + el
+nello          |  in + lo
+nei            |  in + i
+negli          |  in + gli
+nell           |  in + l'
+negl           |  in + gl'
+nella          |  in + la
+nelle          |  in + le
+su             |  on
+sul            |  su + il
+sullo          |  su + lo
+sui            |  su + i
+sugli          |  su + gli
+sull           |  su + l'
+sugl           |  su + gl'
+sulla          |  su + la
+sulle          |  su + le
+per            |  through, by
+tra            |  among
+contro         |  against
+io             |  I
+tu             |  thou
+lui            |  he
+lei            |  she
+noi            |  we
+voi            |  you
+loro           |  they
+mio            |  my
+mia            |
+miei           |
+mie            |
+tuo            |
+tua            |
+tuoi           |  thy
+tue            |
+suo            |
+sua            |
+suoi           |  his, her
+sue            |
+nostro         |  our
+nostra         |
+nostri         |
+nostre         |
+vostro         |  your
+vostra         |
+vostri         |
+vostre         |
+mi             |  me
+ti             |  thee
+ci             |  us, there
+vi             |  you, there
+lo             |  him, the
+la             |  her, the
+li             |  them
+le             |  them, the
+gli            |  to him, the
+ne             |  from there etc
+il             |  the
+un             |  a
+uno            |  a
+una            |  a
+ma             |  but
+ed             |  and
+se             |  if
+perché         |  why, because
+anche          |  also
+come           |  how
+dov            |  where (as dov')
+dove           |  where
+che            |  who, that
+chi            |  who
+cui            |  whom
+non            |  not
+più            |  more
+quale          |  who, that
+quanto         |  how much
+quanti         |
+quanta         |
+quante         |
+quello         |  that
+quelli         |
+quella         |
+quelle         |
+questo         |  this
+questi         |
+questa         |
+queste         |
+si             |  yes
+tutto          |  all
+tutti          |  all
+
+               |  single letter forms:
+
+a              |  at
+c              |  as c' for ce or ci
+e              |  and
+i              |  the
+l              |  as l'
+o              |  or
+
+               | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrà
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+               | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarà
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+               | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farà
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+               | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starà
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ja.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ja.txt

new file mode 100644 (file)

index 0000000..d4321be
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_ja.txt
@@ -0,0 +1,127 @@
+#
+# This file defines a stopword set for Japanese.
+#
+# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
+# Punctuation characters and frequent kanji have mostly been left out.  See LUCENE-3745
+# for frequency lists, etc. that can be useful for making your own set (if desired)
+#
+# Note that there is an overlap between these stopwords and the terms stopped when used
+# in combination with the JapanesePartOfSpeechStopFilter.  When editing this file, note
+# that comments are not allowed on the same line as stopwords.
+#
+# Also note that stopping is done in a case-insensitive manner.  Change your StopFilter
+# configuration if you need case-sensitive stopping.  Lastly, note that stopping is done
+# using the same character width as the entries in this file.  Since this StopFilter is
+# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
+# entries to be in half-width and your kana entries to be in full-width.
+#
+の
+に
+は
+を
+た
+が
+で
+て
+と
+し
+れ
+さ
+ある
+いる
+も
+する
+から
+な
+こと
+として
+い
+や
+れる
+など
+なっ
+ない
+この
+ため
+その
+あっ
+よう
+また
+もの
+という
+あり
+まで
+られ
+なる
+へ
+か
+だ
+これ
+によって
+により
+おり
+より
+による
+ず
+なり
+られる
+において
+ば
+なかっ
+なく
+しかし
+について
+せ
+だっ
+その後
+できる
+それ
+う
+ので
+なお
+のみ
+でき
+き
+つ
+における
+および
+いう
+さらに
+でも
+ら
+たり
+その他
+に関する
+たち
+ます
+ん
+なら
+に対して
+特に
+せる
+及び
+これら
+とき
+では
+にて
+ほか
+ながら
+うち
+そして
+とともに
+ただし
+かつて
+それぞれ
+または
+お
+ほど
+ものの
+に対する
+ほとんど
+と共に
+といった
+です
+とも
+ところ
+ここ
+##### End of file
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_lv.txt b/zookeeper/solr/collection1/conf/lang/stopwords_lv.txt

new file mode 100644 (file)

index 0000000..e21a23c
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_lv.txt
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined: 
+#   pronouns, adverbs, interjections were removed
+# 
+# prepositions
+aiz
+ap
+ar
+apakš
+ārpus
+augšpus
+bez
+caur
+dēļ
+gar
+iekš
+iz
+kopš
+labad
+lejpus
+līdz
+no
+otrpus
+pa
+par
+pār
+pēc
+pie
+pirms
+pret
+priekš
+starp
+šaipus
+uz
+viņpus
+virs
+virspus
+zem
+apakšpus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tomēr
+tikko
+turpretī
+arī
+kaut
+gan
+tādēļ
+tā
+ne
+tikvien
+vien
+kā
+ir
+te
+vai
+kamēr
+# Particles
+ar
+diezin
+droši
+diemžēl
+nebūt
+ik
+it
+taču
+nu
+pat
+tiklab
+iekšpus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iekām
+iekāms
+kolīdz
+līdzko
+tiklīdz
+jebšu
+tālab
+tāpēc
+nekā
+itin
+jā
+jau
+jel
+nē
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+būt  
+biju 
+biji
+bija
+bijām
+bijāt
+esmu
+esi
+esam
+esat 
+būšu     
+būsi
+būs
+būsim
+būsiet
+tikt
+tiku
+tiki
+tika
+tikām
+tikāt
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tikšu
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tapāt
+topat
+tapšu
+tapsi
+taps
+tapsim
+tapsiet
+kļūt
+kļuvu
+kļuvi
+kļuva
+kļuvām
+kļuvāt
+kļūstu
+kļūsti
+kļūst
+kļūstam
+kļūstat
+kļūšu
+kļūsi
+kļūs
+kļūsim
+kļūsiet
+# verbs
+varēt
+varēju
+varējām
+varēšu
+varēsim
+var
+varēji
+varējāt
+varēsi
+varēsiet
+varat
+varēja
+varēs
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_nl.txt b/zookeeper/solr/collection1/conf/lang/stopwords_nl.txt

new file mode 100644 (file)

index 0000000..f4d61f5
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_nl.txt
@@ -0,0 +1,117 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de             |  the
+en             |  and
+van            |  of, from
+ik             |  I, the ego
+te             |  (1) chez, at etc, (2) to, (3) too
+dat            |  that, which
+die            |  that, those, who, which
+in             |  in, inside
+een            |  a, an, one
+hij            |  he
+het            |  the, it
+niet           |  not, nothing, naught
+zijn           |  (1) to be, being, (2) his, one's, its
+is             |  is
+was            |  (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op             |  on, upon, at, in, up, used up
+aan            |  on, upon, to (as dative)
+met            |  with, by
+als            |  like, such as, when
+voor           |  (1) before, in front of, (2) furrow
+had            |  had, past tense all persons sing. of 'hebben' (have)
+er             |  there
+maar           |  but, only
+om             |  round, about, for etc
+hem            |  him
+dan            |  then
+zou            |  should/would, past tense all persons sing. of 'zullen'
+of             |  or, whether, if
+wat            |  what, something, anything
+mijn           |  possessive and noun 'mine'
+men            |  people, 'one'
+dit            |  this
+zo             |  so, thus, in this way
+door           |  through by
+over           |  over, across
+ze             |  she, her, they, them
+zich           |  oneself
+bij            |  (1) a bee, (2) by, near, at
+ook            |  also, too
+tot            |  till, until
+je             |  you
+mij            |  me
+uit            |  out of, from
+der            |  Old Dutch form of 'van der' still found in surnames
+daar           |  (1) there, (2) because
+haar           |  (1) her, their, them, (2) hair
+naar           |  (1) unpleasant, unwell etc, (2) towards, (3) as
+heb            |  present first person sing. of 'to have'
+hoe            |  how, why
+heeft          |  present third person sing. of 'to have'
+hebben         |  'to have' and various parts thereof
+deze           |  this
+u              |  you
+want           |  (1) for, (2) mitten, (3) rigging
+nog            |  yet, still
+zal            |  'shall', first and third person sing. of verb 'zullen' (will)
+me             |  me
+zij            |  she, they
+nu             |  now
+ge             |  'thou', still used in Belgium and south Netherlands
+geen           |  none
+omdat          |  because
+iets           |  something, somewhat
+worden         |  to become, grow, get
+toch           |  yet, still
+al             |  all, every, each
+waren          |  (1) 'were' (2) to wander, (3) wares, (3)
+veel           |  much, many
+meer           |  (1) more, (2) lake
+doen           |  to do, to make
+toen           |  then, when
+moet           |  noun 'spot/mote' and present form of 'to must'
+ben            |  (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder         |  without
+kan            |  noun 'can' and present form of 'to be able'
+hun            |  their, them
+dus            |  so, consequently
+alles          |  all, everything, anything
+onder          |  under, beneath
+ja             |  yes, of course
+eens           |  once, one day
+hier           |  here
+wie            |  who
+werd           |  imperfect third person sing. of 'become'
+altijd         |  always
+doch           |  yet, but etc
+wordt          |  present third person sing. of 'become'
+wezen          |  (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen         |  to be able
+ons            |  us/our
+zelf           |  self
+tegen          |  against, towards, at
+na             |  after, near
+reeds          |  already
+wil            |  (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon            |  could; past tense of 'to be able'
+niets          |  nothing
+uw             |  your
+iemand         |  somebody
+geweest        |  been; past participle of 'be'
+andere         |  other
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_no.txt b/zookeeper/solr/collection1/conf/lang/stopwords_no.txt

new file mode 100644 (file)

index 0000000..e76f36e
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_no.txt
@@ -0,0 +1,192 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
+
+og             | and
+i              | in
+jeg            | I
+det            | it/this/that
+at             | to (w. inf.)
+en             | a/an
+et             | a/an
+den            | it/this/that
+til            | to
+er             | is/am/are
+som            | who/that
+på             | on
+de             | they / you(formal)
+med            | with
+han            | he
+av             | of
+ikke           | not
+ikkje          | not *
+der            | there
+så             | so
+var            | was/were
+meg            | me
+seg            | you
+men            | but
+ett            | one
+har            | have
+om             | about
+vi             | we
+min            | my
+mitt           | my
+ha             | have
+hadde          | had
+hun            | she
+nå             | now
+over           | over
+da             | when/as
+ved            | by/know
+fra            | from
+du             | you
+ut             | out
+sin            | your
+dem            | them
+oss            | us
+opp            | up
+man            | you/one
+kan            | can
+hans           | his
+hvor           | where
+eller          | or
+hva            | what
+skal           | shall/must
+selv           | self (reflective)
+sjøl           | self (reflective)
+her            | here
+alle           | all
+vil            | will
+bli            | become
+ble            | became
+blei           | became *
+blitt          | have become
+kunne          | could
+inn            | in
+når            | when
+være           | be
+kom            | come
+noen           | some
+noe            | some
+ville          | would
+dere           | you
+som            | who/which/that
+deres          | their/theirs
+kun            | only/just
+ja             | yes
+etter          | after
+ned            | down
+skulle         | should
+denne          | this
+for            | for/because
+deg            | you
+si             | hers/his
+sine           | hers/his
+sitt           | hers/his
+mot            | against
+å              | to
+meget          | much
+hvorfor        | why
+dette          | this
+disse          | these/those
+uten           | without
+hvordan        | how
+ingen          | none
+din            | your
+ditt           | your
+blir           | become
+samme          | same
+hvilken        | which
+hvilke         | which (plural)
+sånn           | such a
+inni           | inside/within
+mellom         | between
+vår            | our
+hver           | each
+hvem           | who
+vors           | us/ours
+hvis           | whose
+både           | both
+bare           | only/just
+enn            | than
+fordi          | as/because
+før            | before
+mange          | many
+også           | also
+slik           | just
+vært           | been
+være           | to be
+båe            | both *
+begge          | both
+siden          | since
+dykk           | your *
+dykkar         | yours *
+dei            | they *
+deira          | them *
+deires         | theirs *
+deim           | them *
+di             | your (fem.) *
+då             | as/when *
+eg             | I *
+ein            | a/an *
+eit            | a/an *
+eitt           | a/an *
+elles          | or *
+honom          | he *
+hjå            | at *
+ho             | she *
+hoe            | she *
+henne          | her
+hennar         | her/hers
+hennes         | hers
+hoss           | how *
+hossen         | how *
+ikkje          | not *
+ingi           | noone *
+inkje          | noone *
+korleis        | how *
+korso          | how *
+kva            | what/which *
+kvar           | where *
+kvarhelst      | where *
+kven           | who/whom *
+kvi            | why *
+kvifor         | why *
+me             | we *
+medan          | while *
+mi             | my *
+mine           | my *
+mykje          | much *
+no             | now *
+nokon          | some (masc./neut.) *
+noka           | some (fem.) *
+nokor          | some *
+noko           | some *
+nokre          | some *
+si             | his/hers *
+sia            | since *
+sidan          | since *
+so             | so *
+somt           | some *
+somme          | some *
+um             | about*
+upp            | up *
+vere           | be *
+vore           | was *
+verte          | become *
+vort           | become *
+varte          | became *
+vart           | became *
+
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_pt.txt b/zookeeper/solr/collection1/conf/lang/stopwords_pt.txt

new file mode 100644 (file)

index 0000000..276c1b4
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_pt.txt
@@ -0,0 +1,251 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de             |  of, from
+a              |  the; to, at; her
+o              |  the; him
+que            |  who, that
+e              |  and
+do             |  de + o
+da             |  de + a
+em             |  in
+um             |  a
+para           |  for
+  | é          from SER
+com            |  with
+não            |  not, no
+uma            |  a
+os             |  the; them
+no             |  em + o
+se             |  himself etc
+na             |  em + a
+por            |  for
+mais           |  more
+as             |  the; them
+dos            |  de + os
+como           |  as, like
+mas            |  but
+  | foi        from SER
+ao             |  a + o
+ele            |  he
+das            |  de + as
+  | tem        from TER
+à              |  a + a
+seu            |  his
+sua            |  her
+ou             |  or
+  | ser        from SER
+quando         |  when
+muito          |  much
+  | há         from HAV
+nos            |  em + os; us
+já             |  already, now
+  | está       from EST
+eu             |  I
+também         |  also
+só             |  only, just
+pelo           |  per + o
+pela           |  per + a
+até            |  up to
+isso           |  that
+ela            |  he
+entre          |  between
+  | era        from SER
+depois         |  after
+sem            |  without
+mesmo          |  same
+aos            |  a + os
+  | ter        from TER
+seus           |  his
+quem           |  whom
+nas            |  em + as
+me             |  me
+esse           |  that
+eles           |  they
+  | estão      from EST
+você           |  you
+  | tinha      from TER
+  | foram      from SER
+essa           |  that
+num            |  em + um
+nem            |  nor
+suas           |  her
+meu            |  my
+às             |  a + as
+minha          |  my
+  | têm        from TER
+numa           |  em + uma
+pelos          |  per + os
+elas           |  they
+  | havia      from HAV
+  | seja       from SER
+qual           |  which
+  | será       from SER
+nós            |  we
+  | tenho      from TER
+lhe            |  to him, her
+deles          |  of them
+essas          |  those
+esses          |  those
+pelas          |  per + as
+este           |  this
+  | fosse      from SER
+dele           |  of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu             |  thou
+te             |  thee
+vocês          |  you (plural)
+vos            |  you
+lhes           |  to them
+meus           |  my
+minhas
+teu            |  thy
+tua
+teus
+tuas
+nosso          | our
+nossa
+nossos
+nossas
+
+dela           |  of her
+delas          |  of them
+
+esta           |  this
+estes          |  these
+estas          |  these
+aquele         |  that
+aquela         |  that
+aqueles        |  those
+aquelas        |  those
+isto           |  this
+aquilo         |  that
+
+               | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+               | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houveríamos
+houveriam
+
+               | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+seríamos
+seriam
+
+               | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tínhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+teríamos
+teriam
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ro.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ro.txt

new file mode 100644 (file)

index 0000000..4fdee90
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_ro.txt
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceşti
+aceştia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aş
+aşadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deşi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eşti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în 
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+nişte
+noastră
+noastre
+noi
+noştri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+şi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+ţi
+ţie
+tine
+toată
+toate
+tot
+toţi
+totuşi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voştri
+vostru
+vouă
+vreo
+vreun
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_ru.txt b/zookeeper/solr/collection1/conf/lang/stopwords_ru.txt

new file mode 100644 (file)

index 0000000..6430769
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_ru.txt
@@ -0,0 +1,241 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `ё' is translated to `е'.
+
+и              | and
+в              | in/into
+во             | alternative form
+не             | not
+что            | what/that
+он             | he
+на             | on/onto
+я              | i
+с              | from
+со             | alternative form
+как            | how
+а              | milder form of `no' (but)
+то             | conjunction and form of `that'
+все            | all
+она            | she
+так            | so, thus
+его            | him
+но             | but
+да             | yes/and
+ты             | thou
+к              | towards, by
+у              | around, chez
+же             | intensifier particle
+вы             | you
+за             | beyond, behind
+бы             | conditional/subj. particle
+по             | up to, along
+только         | only
+ее             | her
+мне            | to me
+было           | it was
+вот            | here is/are, particle
+от             | away from
+меня           | me
+еще            | still, yet, more
+нет            | no, there isnt/arent
+о              | about
+из             | out of
+ему            | to him
+теперь         | now
+когда          | when
+даже           | even
+ну             | so, well
+вдруг          | suddenly
+ли             | interrogative particle
+если           | if
+уже            | already, but homonym of `narrower'
+или            | or
+ни             | neither
+быть           | to be
+был            | he was
+него           | prepositional form of его
+до             | up to
+вас            | you accusative
+нибудь         | indef. suffix preceded by hyphen
+опять          | again
+уж             | already, but homonym of `adder'
+вам            | to you
+сказал         | he said
+ведь           | particle `after all'
+там            | there
+потом          | then
+себя           | oneself
+ничего         | nothing
+ей             | to her
+может          | usually with `быть' as `maybe'
+они            | they
+тут            | here
+где            | where
+есть           | there is/are
+надо           | got to, must
+ней            | prepositional form of  ей
+для            | for
+мы             | we
+тебя           | thee
+их             | them, their
+чем            | than
+была           | she was
+сам            | self
+чтоб           | in order to
+без            | without
+будто          | as if
+человек        | man, person, one
+чего           | genitive form of `what'
+раз            | once
+тоже           | also
+себе           | to oneself
+под            | beneath
+жизнь          | life
+будет          | will be
+ж              | short form of intensifer particle `же'
+тогда          | then
+кто            | who
+этот           | this
+говорил        | was saying
+того           | genitive form of `that'
+потому         | for that reason
+этого          | genitive form of `this'
+какой          | which
+совсем         | altogether
+ним            | prepositional form of `его', `они'
+здесь          | here
+этом           | prepositional form of `этот'
+один           | one
+почти          | almost
+мой            | my
+тем            | instrumental/dative plural of `тот', `то'
+чтобы          | full form of `in order that'
+нее            | her (acc.)
+кажется        | it seems
+сейчас         | now
+были           | they were
+куда           | where to
+зачем          | why
+сказать        | to say
+всех           | all (acc., gen. preposn. plural)
+никогда        | never
+сегодня        | today
+можно          | possible, one can
+при            | by
+наконец        | finally
+два            | two
+об             | alternative form of `о', about
+другой         | another
+хоть           | even
+после          | after
+над            | above
+больше         | more
+тот            | that one (masc.)
+через          | across, in
+эти            | these
+нас            | us
+про            | about
+всего          | in all, only, of all
+них            | prepositional form of `они' (they)
+какая          | which, feminine
+много          | lots
+разве          | interrogative particle
+сказала        | she said
+три            | three
+эту            | this, acc. fem. sing.
+моя            | my, feminine
+впрочем        | moreover, besides
+хорошо         | good
+свою           | ones own, acc. fem. sing.
+этой           | oblique form of `эта', fem. `this'
+перед          | in front of
+иногда         | sometimes
+лучше          | better
+чуть           | a little
+том            | preposn. form of `that one'
+нельзя         | one must not
+такой          | such a one
+им             | to them
+более          | more
+всегда         | always
+конечно        | of course
+всю            | acc. fem. sing of `all'
+между          | between
+
+
+  | b: some paradigms
+  |
+  | personal pronouns
+  |
+  | я  меня  мне  мной  [мною]
+  | ты  тебя  тебе  тобой  [тобою]
+  | он  его  ему  им  [него, нему, ним]
+  | она  ее  эи  ею  [нее, нэи, нею]
+  | оно  его  ему  им  [него, нему, ним]
+  |
+  | мы  нас  нам  нами
+  | вы  вас  вам  вами
+  | они  их  им  ими  [них, ним, ними]
+  |
+  |   себя  себе  собой   [собою]
+  |
+  | demonstrative pronouns: этот (this), тот (that)
+  |
+  | этот  эта  это  эти
+  | этого  эты  это  эти
+  | этого  этой  этого  этих
+  | этому  этой  этому  этим
+  | этим  этой  этим  [этою]  этими
+  | этом  этой  этом  этих
+  |
+  | тот  та  то  те
+  | того  ту  то  те
+  | того  той  того  тех
+  | тому  той  тому  тем
+  | тем  той  тем  [тою]  теми
+  | том  той  том  тех
+  |
+  | determinative pronouns
+  |
+  | (a) весь (all)
+  |
+  | весь  вся  все  все
+  | всего  всю  все  все
+  | всего  всей  всего  всех
+  | всему  всей  всему  всем
+  | всем  всей  всем  [всею]  всеми
+  | всем  всей  всем  всех
+  |
+  | (b) сам (himself etc)
+  |
+  | сам  сама  само  сами
+  | самого саму  само  самих
+  | самого самой самого  самих
+  | самому самой самому  самим
+  | самим  самой  самим  [самою]  самими
+  | самом самой самом  самих
+  |
+  | stems of verbs `to be', `to have', `to do' and modal
+  |
+  | быть  бы  буд  быв  есть  суть
+  | име
+  | дел
+  | мог   мож  мочь
+  | уме
+  | хоч  хот
+  | долж
+  | можн
+  | нужн
+  | нельзя
+
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_sv.txt b/zookeeper/solr/collection1/conf/lang/stopwords_sv.txt

new file mode 100644 (file)

index 0000000..22bddfd
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_sv.txt
@@ -0,0 +1,131 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ |  så = so, but also seed. These are indicated clearly below.
+
+och            | and
+det            | it, this/that
+att            | to (with infinitive)
+i              | in, at
+en             | a
+jag            | I
+hon            | she
+som            | who, that
+han            | he
+på             | on
+den            | it, this/that
+med            | with
+var            | where, each
+sig            | him(self) etc
+för            | for
+så             | so (also: seed)
+till           | to
+är             | is
+men            | but
+ett            | a
+om             | if; around, about
+hade           | had
+de             | they, these/those
+av             | of
+icke           | not, no
+mig            | me
+du             | you
+henne          | her
+då             | then, when
+sin            | his
+nu             | now
+har            | have
+inte           | inte någon = no one
+hans           | his
+honom          | him
+skulle         | 'sake'
+hennes         | her
+där            | there
+min            | my
+man            | one (pronoun)
+ej             | nor
+vid            | at, by, on (also: vast)
+kunde          | could
+något          | some etc
+från           | from, off
+ut             | out
+när            | when
+efter          | after, behind
+upp            | up
+vi             | we
+dem            | them
+vara           | be
+vad            | what
+över           | over
+än             | than
+dig            | you
+kan            | can
+sina           | his
+här            | here
+ha             | have
+mot            | towards
+alla           | all
+under          | under (also: wonder)
+någon          | some etc
+eller          | or (else)
+allt           | all
+mycket         | much
+sedan          | since
+ju             | why
+denna          | this/that
+själv          | myself, yourself etc
+detta          | this/that
+åt             | to
+utan           | without
+varit          | was
+hur            | how
+ingen          | no
+mitt           | my
+ni             | you
+bli            | to be, become
+blev           | from bli
+oss            | us
+din            | thy
+dessa          | these/those
+några          | some etc
+deras          | their
+blir           | from bli
+mina           | my
+samma          | (the) same
+vilken         | who, that
+er             | you, your
+sådan          | such a
+vår            | our
+blivit         | from bli
+dess           | its
+inom           | within
+mellan         | between
+sådant         | such a
+varför         | why
+varje          | each
+vilka          | who, that
+ditt           | thy
+vem            | who
+vilket         | who, that
+sitta          | his
+sådana         | such a
+vart           | each
+dina           | thy
+vars           | whose
+vårt           | our
+våra           | our
+ert            | your
+era            | your
+vilkas         | whose
+
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_th.txt b/zookeeper/solr/collection1/conf/lang/stopwords_th.txt

new file mode 100644 (file)

index 0000000..07f0fab
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_th.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+แห่ง
+แล้ว
+และ
+แรก
+แบบ
+แต่
+เอง
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่อ
+เพื่อ
+เพราะ
+เป็นการ
+เป็น
+เปิดเผย
+เปิด
+เนื่องจาก
+เดียวกัน
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+อีก
+อาจ
+อะไร
+ออก
+อย่าง
+อยู่
+อยาก
+หาก
+หลาย
+หลังจาก
+หลัง
+หรือ
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สําหรับ
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาก
+มา
+พร้อม
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นํา
+นั้น
+นัก
+นอกจาก
+ทุก
+ที่สุด
+ที่
+ทําให้
+ทํา
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูก
+ถึง
+ต้อง
+ต่างๆ
+ต่าง
+ต่อ
+ตาม
+ตั้งแต่
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาก
+จัด
+จะ
+คือ
+ความ
+ครั้ง
+คง
+ขึ้น
+ของ
+ขอ
+ขณะ
+ก่อน
+ก็
+การ
+กับ
+กัน
+กว่า
+กล่าว
diff --git a/zookeeper/solr/collection1/conf/lang/stopwords_tr.txt b/zookeeper/solr/collection1/conf/lang/stopwords_tr.txt

new file mode 100644 (file)

index 0000000..84d9408
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/stopwords_tr.txt
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+#   (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmış
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beş
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birşey
+birşeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+değil
+diğer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eğer
+elli
+en
+etmesi
+etti
+ettiği
+ettiğini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+işte
+itibaren
+itibariyle
+kadar
+karşın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduğu
+olduğunu
+olduklarını
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+rağmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+şey
+şeyden
+şeyi
+şeyler
+şöyle
+şu
+şuna
+şunda
+şundan
+şunları
+şunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptığı
+yaptığını
+yaptıkları
+yedi
+yerine
+yetmiş
+yine
+yirmi
+yoksa
+yüz
+zaten
diff --git a/zookeeper/solr/collection1/conf/lang/userdict_ja.txt b/zookeeper/solr/collection1/conf/lang/userdict_ja.txt

new file mode 100644 (file)

index 0000000..6f0368e
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/lang/userdict_ja.txt
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags.  Notice that entries do
+# not have weights since they are always used when found.  This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+#  <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same <text> is undefined.
+#
+# Whitespace only lines are ignored.  Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
+関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
+
+# Custom segmentation for compound katakana
+トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
+ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
+
+# Custom reading for former sumo wrestler
+朝青龍,朝青龍,アサショウリュウ,カスタム人名
diff --git a/zookeeper/solr/collection1/conf/mapping-FoldToASCII.txt b/zookeeper/solr/collection1/conf/mapping-FoldToASCII.txt

new file mode 100644 (file)

index 0000000..9a84b6e
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/mapping-FoldToASCII.txt
@@ -0,0 +1,3813 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This map converts alphabetic, numeric, and symbolic Unicode characters
+# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
+# block) into their ASCII equivalents, if one exists.
+#
+# Characters from the following Unicode blocks are converted; however, only
+# those characters with reasonable ASCII alternatives are converted:
+#
+# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf
+# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf
+# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf
+# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf
+# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf
+# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf
+# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf
+# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf
+# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf
+# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf
+# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf
+# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf
+# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf
+# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf
+# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf
+# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf
+#  
+# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
+#
+# The set of character conversions supported by this map is a superset of
+# those supported by the map represented by mapping-ISOLatin1Accent.txt.
+#
+# See the bottom of this file for the Perl script used to generate the contents
+# of this file (without this header) from ASCIIFoldingFilter.java.
+
+
+# Syntax:
+#   "source" => "target"
+#     "source".length() > 0 (source cannot be empty.)
+#     "target".length() >= 0 (target can be empty.)
+
+
+# À  [LATIN CAPITAL LETTER A WITH GRAVE]
+"\u00C0" => "A"
+
+# Á  [LATIN CAPITAL LETTER A WITH ACUTE]
+"\u00C1" => "A"
+
+# Â  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
+"\u00C2" => "A"
+
+# Ã  [LATIN CAPITAL LETTER A WITH TILDE]
+"\u00C3" => "A"
+
+# Ä  [LATIN CAPITAL LETTER A WITH DIAERESIS]
+"\u00C4" => "A"
+
+# Å  [LATIN CAPITAL LETTER A WITH RING ABOVE]
+"\u00C5" => "A"
+
+# Ā  [LATIN CAPITAL LETTER A WITH MACRON]
+"\u0100" => "A"
+
+# Ă  [LATIN CAPITAL LETTER A WITH BREVE]
+"\u0102" => "A"
+
+# Ą  [LATIN CAPITAL LETTER A WITH OGONEK]
+"\u0104" => "A"
+
+# Ə  http://en.wikipedia.org/wiki/Schwa  [LATIN CAPITAL LETTER SCHWA]
+"\u018F" => "A"
+
+# Ǎ  [LATIN CAPITAL LETTER A WITH CARON]
+"\u01CD" => "A"
+
+# Ǟ  [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
+"\u01DE" => "A"
+
+# Ǡ  [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
+"\u01E0" => "A"
+
+# Ǻ  [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
+"\u01FA" => "A"
+
+# Ȁ  [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
+"\u0200" => "A"
+
+# Ȃ  [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
+"\u0202" => "A"
+
+# Ȧ  [LATIN CAPITAL LETTER A WITH DOT ABOVE]
+"\u0226" => "A"
+
+# Ⱥ  [LATIN CAPITAL LETTER A WITH STROKE]
+"\u023A" => "A"
+
+# ᴀ  [LATIN LETTER SMALL CAPITAL A]
+"\u1D00" => "A"
+
+# Ḁ  [LATIN CAPITAL LETTER A WITH RING BELOW]
+"\u1E00" => "A"
+
+# Ạ  [LATIN CAPITAL LETTER A WITH DOT BELOW]
+"\u1EA0" => "A"
+
+# Ả  [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
+"\u1EA2" => "A"
+
+# Ấ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
+"\u1EA4" => "A"
+
+# Ầ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
+"\u1EA6" => "A"
+
+# Ẩ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1EA8" => "A"
+
+# Ẫ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
+"\u1EAA" => "A"
+
+# Ậ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+"\u1EAC" => "A"
+
+# Ắ  [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
+"\u1EAE" => "A"
+
+# Ằ  [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
+"\u1EB0" => "A"
+
+# Ẳ  [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
+"\u1EB2" => "A"
+
+# Ẵ  [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
+"\u1EB4" => "A"
+
+# Ặ  [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
+"\u1EB6" => "A"
+
+# Ⓐ  [CIRCLED LATIN CAPITAL LETTER A]
+"\u24B6" => "A"
+
+# Ａ  [FULLWIDTH LATIN CAPITAL LETTER A]
+"\uFF21" => "A"
+
+# à  [LATIN SMALL LETTER A WITH GRAVE]
+"\u00E0" => "a"
+
+# á  [LATIN SMALL LETTER A WITH ACUTE]
+"\u00E1" => "a"
+
+# â  [LATIN SMALL LETTER A WITH CIRCUMFLEX]
+"\u00E2" => "a"
+
+# ã  [LATIN SMALL LETTER A WITH TILDE]
+"\u00E3" => "a"
+
+# ä  [LATIN SMALL LETTER A WITH DIAERESIS]
+"\u00E4" => "a"
+
+# å  [LATIN SMALL LETTER A WITH RING ABOVE]
+"\u00E5" => "a"
+
+# ā  [LATIN SMALL LETTER A WITH MACRON]
+"\u0101" => "a"
+
+# ă  [LATIN SMALL LETTER A WITH BREVE]
+"\u0103" => "a"
+
+# ą  [LATIN SMALL LETTER A WITH OGONEK]
+"\u0105" => "a"
+
+# ǎ  [LATIN SMALL LETTER A WITH CARON]
+"\u01CE" => "a"
+
+# ǟ  [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
+"\u01DF" => "a"
+
+# ǡ  [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
+"\u01E1" => "a"
+
+# ǻ  [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
+"\u01FB" => "a"
+
+# ȁ  [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
+"\u0201" => "a"
+
+# ȃ  [LATIN SMALL LETTER A WITH INVERTED BREVE]
+"\u0203" => "a"
+
+# ȧ  [LATIN SMALL LETTER A WITH DOT ABOVE]
+"\u0227" => "a"
+
+# ɐ  [LATIN SMALL LETTER TURNED A]
+"\u0250" => "a"
+
+# ə  [LATIN SMALL LETTER SCHWA]
+"\u0259" => "a"
+
+# ɚ  [LATIN SMALL LETTER SCHWA WITH HOOK]
+"\u025A" => "a"
+
+# ᶏ  [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
+"\u1D8F" => "a"
+
+# ᶕ  [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
+"\u1D95" => "a"
+
+# ạ  [LATIN SMALL LETTER A WITH RING BELOW]
+"\u1E01" => "a"
+
+# ả  [LATIN SMALL LETTER A WITH RIGHT HALF RING]
+"\u1E9A" => "a"
+
+# ạ  [LATIN SMALL LETTER A WITH DOT BELOW]
+"\u1EA1" => "a"
+
+# ả  [LATIN SMALL LETTER A WITH HOOK ABOVE]
+"\u1EA3" => "a"
+
+# ấ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
+"\u1EA5" => "a"
+
+# ầ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
+"\u1EA7" => "a"
+
+# ẩ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1EA9" => "a"
+
+# ẫ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
+"\u1EAB" => "a"
+
+# ậ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+"\u1EAD" => "a"
+
+# ắ  [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
+"\u1EAF" => "a"
+
+# ằ  [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
+"\u1EB1" => "a"
+
+# ẳ  [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
+"\u1EB3" => "a"
+
+# ẵ  [LATIN SMALL LETTER A WITH BREVE AND TILDE]
+"\u1EB5" => "a"
+
+# ặ  [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
+"\u1EB7" => "a"
+
+# ₐ  [LATIN SUBSCRIPT SMALL LETTER A]
+"\u2090" => "a"
+
+# ₔ  [LATIN SUBSCRIPT SMALL LETTER SCHWA]
+"\u2094" => "a"
+
+# ⓐ  [CIRCLED LATIN SMALL LETTER A]
+"\u24D0" => "a"
+
+# ⱥ  [LATIN SMALL LETTER A WITH STROKE]
+"\u2C65" => "a"
+
+# Ɐ  [LATIN CAPITAL LETTER TURNED A]
+"\u2C6F" => "a"
+
+# ａ  [FULLWIDTH LATIN SMALL LETTER A]
+"\uFF41" => "a"
+
+# Ꜳ  [LATIN CAPITAL LETTER AA]
+"\uA732" => "AA"
+
+# Æ  [LATIN CAPITAL LETTER AE]
+"\u00C6" => "AE"
+
+# Ǣ  [LATIN CAPITAL LETTER AE WITH MACRON]
+"\u01E2" => "AE"
+
+# Ǽ  [LATIN CAPITAL LETTER AE WITH ACUTE]
+"\u01FC" => "AE"
+
+# ᴁ  [LATIN LETTER SMALL CAPITAL AE]
+"\u1D01" => "AE"
+
+# Ꜵ  [LATIN CAPITAL LETTER AO]
+"\uA734" => "AO"
+
+# Ꜷ  [LATIN CAPITAL LETTER AU]
+"\uA736" => "AU"
+
+# Ꜹ  [LATIN CAPITAL LETTER AV]
+"\uA738" => "AV"
+
+# Ꜻ  [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
+"\uA73A" => "AV"
+
+# Ꜽ  [LATIN CAPITAL LETTER AY]
+"\uA73C" => "AY"
+
+# ⒜  [PARENTHESIZED LATIN SMALL LETTER A]
+"\u249C" => "(a)"
+
+# ꜳ  [LATIN SMALL LETTER AA]
+"\uA733" => "aa"
+
+# æ  [LATIN SMALL LETTER AE]
+"\u00E6" => "ae"
+
+# ǣ  [LATIN SMALL LETTER AE WITH MACRON]
+"\u01E3" => "ae"
+
+# ǽ  [LATIN SMALL LETTER AE WITH ACUTE]
+"\u01FD" => "ae"
+
+# ᴂ  [LATIN SMALL LETTER TURNED AE]
+"\u1D02" => "ae"
+
+# ꜵ  [LATIN SMALL LETTER AO]
+"\uA735" => "ao"
+
+# ꜷ  [LATIN SMALL LETTER AU]
+"\uA737" => "au"
+
+# ꜹ  [LATIN SMALL LETTER AV]
+"\uA739" => "av"
+
+# ꜻ  [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
+"\uA73B" => "av"
+
+# ꜽ  [LATIN SMALL LETTER AY]
+"\uA73D" => "ay"
+
+# Ɓ  [LATIN CAPITAL LETTER B WITH HOOK]
+"\u0181" => "B"
+
+# Ƃ  [LATIN CAPITAL LETTER B WITH TOPBAR]
+"\u0182" => "B"
+
+# Ƀ  [LATIN CAPITAL LETTER B WITH STROKE]
+"\u0243" => "B"
+
+# ʙ  [LATIN LETTER SMALL CAPITAL B]
+"\u0299" => "B"
+
+# ᴃ  [LATIN LETTER SMALL CAPITAL BARRED B]
+"\u1D03" => "B"
+
+# Ḃ  [LATIN CAPITAL LETTER B WITH DOT ABOVE]
+"\u1E02" => "B"
+
+# Ḅ  [LATIN CAPITAL LETTER B WITH DOT BELOW]
+"\u1E04" => "B"
+
+# Ḇ  [LATIN CAPITAL LETTER B WITH LINE BELOW]
+"\u1E06" => "B"
+
+# Ⓑ  [CIRCLED LATIN CAPITAL LETTER B]
+"\u24B7" => "B"
+
+# Ｂ  [FULLWIDTH LATIN CAPITAL LETTER B]
+"\uFF22" => "B"
+
+# ƀ  [LATIN SMALL LETTER B WITH STROKE]
+"\u0180" => "b"
+
+# ƃ  [LATIN SMALL LETTER B WITH TOPBAR]
+"\u0183" => "b"
+
+# ɓ  [LATIN SMALL LETTER B WITH HOOK]
+"\u0253" => "b"
+
+# ᵬ  [LATIN SMALL LETTER B WITH MIDDLE TILDE]
+"\u1D6C" => "b"
+
+# ᶀ  [LATIN SMALL LETTER B WITH PALATAL HOOK]
+"\u1D80" => "b"
+
+# ḃ  [LATIN SMALL LETTER B WITH DOT ABOVE]
+"\u1E03" => "b"
+
+# ḅ  [LATIN SMALL LETTER B WITH DOT BELOW]
+"\u1E05" => "b"
+
+# ḇ  [LATIN SMALL LETTER B WITH LINE BELOW]
+"\u1E07" => "b"
+
+# ⓑ  [CIRCLED LATIN SMALL LETTER B]
+"\u24D1" => "b"
+
+# ｂ  [FULLWIDTH LATIN SMALL LETTER B]
+"\uFF42" => "b"
+
+# ⒝  [PARENTHESIZED LATIN SMALL LETTER B]
+"\u249D" => "(b)"
+
+# Ç  [LATIN CAPITAL LETTER C WITH CEDILLA]
+"\u00C7" => "C"
+
+# Ć  [LATIN CAPITAL LETTER C WITH ACUTE]
+"\u0106" => "C"
+
+# Ĉ  [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
+"\u0108" => "C"
+
+# Ċ  [LATIN CAPITAL LETTER C WITH DOT ABOVE]
+"\u010A" => "C"
+
+# Č  [LATIN CAPITAL LETTER C WITH CARON]
+"\u010C" => "C"
+
+# Ƈ  [LATIN CAPITAL LETTER C WITH HOOK]
+"\u0187" => "C"
+
+# Ȼ  [LATIN CAPITAL LETTER C WITH STROKE]
+"\u023B" => "C"
+
+# ʗ  [LATIN LETTER STRETCHED C]
+"\u0297" => "C"
+
+# ᴄ  [LATIN LETTER SMALL CAPITAL C]
+"\u1D04" => "C"
+
+# Ḉ  [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
+"\u1E08" => "C"
+
+# Ⓒ  [CIRCLED LATIN CAPITAL LETTER C]
+"\u24B8" => "C"
+
+# Ｃ  [FULLWIDTH LATIN CAPITAL LETTER C]
+"\uFF23" => "C"
+
+# ç  [LATIN SMALL LETTER C WITH CEDILLA]
+"\u00E7" => "c"
+
+# ć  [LATIN SMALL LETTER C WITH ACUTE]
+"\u0107" => "c"
+
+# ĉ  [LATIN SMALL LETTER C WITH CIRCUMFLEX]
+"\u0109" => "c"
+
+# ċ  [LATIN SMALL LETTER C WITH DOT ABOVE]
+"\u010B" => "c"
+
+# č  [LATIN SMALL LETTER C WITH CARON]
+"\u010D" => "c"
+
+# ƈ  [LATIN SMALL LETTER C WITH HOOK]
+"\u0188" => "c"
+
+# ȼ  [LATIN SMALL LETTER C WITH STROKE]
+"\u023C" => "c"
+
+# ɕ  [LATIN SMALL LETTER C WITH CURL]
+"\u0255" => "c"
+
+# ḉ  [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
+"\u1E09" => "c"
+
+# ↄ  [LATIN SMALL LETTER REVERSED C]
+"\u2184" => "c"
+
+# ⓒ  [CIRCLED LATIN SMALL LETTER C]
+"\u24D2" => "c"
+
+# Ꜿ  [LATIN CAPITAL LETTER REVERSED C WITH DOT]
+"\uA73E" => "c"
+
+# ꜿ  [LATIN SMALL LETTER REVERSED C WITH DOT]
+"\uA73F" => "c"
+
+# ｃ  [FULLWIDTH LATIN SMALL LETTER C]
+"\uFF43" => "c"
+
+# ⒞  [PARENTHESIZED LATIN SMALL LETTER C]
+"\u249E" => "(c)"
+
+# Ð  [LATIN CAPITAL LETTER ETH]
+"\u00D0" => "D"
+
+# Ď  [LATIN CAPITAL LETTER D WITH CARON]
+"\u010E" => "D"
+
+# Đ  [LATIN CAPITAL LETTER D WITH STROKE]
+"\u0110" => "D"
+
+# Ɖ  [LATIN CAPITAL LETTER AFRICAN D]
+"\u0189" => "D"
+
+# Ɗ  [LATIN CAPITAL LETTER D WITH HOOK]
+"\u018A" => "D"
+
+# Ƌ  [LATIN CAPITAL LETTER D WITH TOPBAR]
+"\u018B" => "D"
+
+# ᴅ  [LATIN LETTER SMALL CAPITAL D]
+"\u1D05" => "D"
+
+# ᴆ  [LATIN LETTER SMALL CAPITAL ETH]
+"\u1D06" => "D"
+
+# Ḋ  [LATIN CAPITAL LETTER D WITH DOT ABOVE]
+"\u1E0A" => "D"
+
+# Ḍ  [LATIN CAPITAL LETTER D WITH DOT BELOW]
+"\u1E0C" => "D"
+
+# Ḏ  [LATIN CAPITAL LETTER D WITH LINE BELOW]
+"\u1E0E" => "D"
+
+# Ḑ  [LATIN CAPITAL LETTER D WITH CEDILLA]
+"\u1E10" => "D"
+
+# Ḓ  [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
+"\u1E12" => "D"
+
+# Ⓓ  [CIRCLED LATIN CAPITAL LETTER D]
+"\u24B9" => "D"
+
+# Ꝺ  [LATIN CAPITAL LETTER INSULAR D]
+"\uA779" => "D"
+
+# Ｄ  [FULLWIDTH LATIN CAPITAL LETTER D]
+"\uFF24" => "D"
+
+# ð  [LATIN SMALL LETTER ETH]
+"\u00F0" => "d"
+
+# ď  [LATIN SMALL LETTER D WITH CARON]
+"\u010F" => "d"
+
+# đ  [LATIN SMALL LETTER D WITH STROKE]
+"\u0111" => "d"
+
+# ƌ  [LATIN SMALL LETTER D WITH TOPBAR]
+"\u018C" => "d"
+
+# ȡ  [LATIN SMALL LETTER D WITH CURL]
+"\u0221" => "d"
+
+# ɖ  [LATIN SMALL LETTER D WITH TAIL]
+"\u0256" => "d"
+
+# ɗ  [LATIN SMALL LETTER D WITH HOOK]
+"\u0257" => "d"
+
+# ᵭ  [LATIN SMALL LETTER D WITH MIDDLE TILDE]
+"\u1D6D" => "d"
+
+# ᶁ  [LATIN SMALL LETTER D WITH PALATAL HOOK]
+"\u1D81" => "d"
+
+# ᶑ  [LATIN SMALL LETTER D WITH HOOK AND TAIL]
+"\u1D91" => "d"
+
+# ḋ  [LATIN SMALL LETTER D WITH DOT ABOVE]
+"\u1E0B" => "d"
+
+# ḍ  [LATIN SMALL LETTER D WITH DOT BELOW]
+"\u1E0D" => "d"
+
+# ḏ  [LATIN SMALL LETTER D WITH LINE BELOW]
+"\u1E0F" => "d"
+
+# ḑ  [LATIN SMALL LETTER D WITH CEDILLA]
+"\u1E11" => "d"
+
+# ḓ  [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
+"\u1E13" => "d"
+
+# ⓓ  [CIRCLED LATIN SMALL LETTER D]
+"\u24D3" => "d"
+
+# ꝺ  [LATIN SMALL LETTER INSULAR D]
+"\uA77A" => "d"
+
+# ｄ  [FULLWIDTH LATIN SMALL LETTER D]
+"\uFF44" => "d"
+
+# Ǆ  [LATIN CAPITAL LETTER DZ WITH CARON]
+"\u01C4" => "DZ"
+
+# Ǳ  [LATIN CAPITAL LETTER DZ]
+"\u01F1" => "DZ"
+
+# ǅ  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
+"\u01C5" => "Dz"
+
+# ǲ  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
+"\u01F2" => "Dz"
+
+# ⒟  [PARENTHESIZED LATIN SMALL LETTER D]
+"\u249F" => "(d)"
+
+# ȸ  [LATIN SMALL LETTER DB DIGRAPH]
+"\u0238" => "db"
+
+# ǆ  [LATIN SMALL LETTER DZ WITH CARON]
+"\u01C6" => "dz"
+
+# ǳ  [LATIN SMALL LETTER DZ]
+"\u01F3" => "dz"
+
+# ʣ  [LATIN SMALL LETTER DZ DIGRAPH]
+"\u02A3" => "dz"
+
+# ʥ  [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
+"\u02A5" => "dz"
+
+# È  [LATIN CAPITAL LETTER E WITH GRAVE]
+"\u00C8" => "E"
+
+# É  [LATIN CAPITAL LETTER E WITH ACUTE]
+"\u00C9" => "E"
+
+# Ê  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
+"\u00CA" => "E"
+
+# Ë  [LATIN CAPITAL LETTER E WITH DIAERESIS]
+"\u00CB" => "E"
+
+# Ē  [LATIN CAPITAL LETTER E WITH MACRON]
+"\u0112" => "E"
+
+# Ĕ  [LATIN CAPITAL LETTER E WITH BREVE]
+"\u0114" => "E"
+
+# Ė  [LATIN CAPITAL LETTER E WITH DOT ABOVE]
+"\u0116" => "E"
+
+# Ę  [LATIN CAPITAL LETTER E WITH OGONEK]
+"\u0118" => "E"
+
+# Ě  [LATIN CAPITAL LETTER E WITH CARON]
+"\u011A" => "E"
+
+# Ǝ  [LATIN CAPITAL LETTER REVERSED E]
+"\u018E" => "E"
+
+# Ɛ  [LATIN CAPITAL LETTER OPEN E]
+"\u0190" => "E"
+
+# Ȅ  [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
+"\u0204" => "E"
+
+# Ȇ  [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
+"\u0206" => "E"
+
+# Ȩ  [LATIN CAPITAL LETTER E WITH CEDILLA]
+"\u0228" => "E"
+
+# Ɇ  [LATIN CAPITAL LETTER E WITH STROKE]
+"\u0246" => "E"
+
+# ᴇ  [LATIN LETTER SMALL CAPITAL E]
+"\u1D07" => "E"
+
+# Ḕ  [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
+"\u1E14" => "E"
+
+# Ḗ  [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
+"\u1E16" => "E"
+
+# Ḙ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
+"\u1E18" => "E"
+
+# Ḛ  [LATIN CAPITAL LETTER E WITH TILDE BELOW]
+"\u1E1A" => "E"
+
+# Ḝ  [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
+"\u1E1C" => "E"
+
+# Ẹ  [LATIN CAPITAL LETTER E WITH DOT BELOW]
+"\u1EB8" => "E"
+
+# Ẻ  [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
+"\u1EBA" => "E"
+
+# Ẽ  [LATIN CAPITAL LETTER E WITH TILDE]
+"\u1EBC" => "E"
+
+# Ế  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
+"\u1EBE" => "E"
+
+# Ề  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
+"\u1EC0" => "E"
+
+# Ể  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1EC2" => "E"
+
+# Ễ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
+"\u1EC4" => "E"
+
+# Ệ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+"\u1EC6" => "E"
+
+# Ⓔ  [CIRCLED LATIN CAPITAL LETTER E]
+"\u24BA" => "E"
+
+# ⱻ  [LATIN LETTER SMALL CAPITAL TURNED E]
+"\u2C7B" => "E"
+
+# Ｅ  [FULLWIDTH LATIN CAPITAL LETTER E]
+"\uFF25" => "E"
+
+# è  [LATIN SMALL LETTER E WITH GRAVE]
+"\u00E8" => "e"
+
+# é  [LATIN SMALL LETTER E WITH ACUTE]
+"\u00E9" => "e"
+
+# ê  [LATIN SMALL LETTER E WITH CIRCUMFLEX]
+"\u00EA" => "e"
+
+# ë  [LATIN SMALL LETTER E WITH DIAERESIS]
+"\u00EB" => "e"
+
+# ē  [LATIN SMALL LETTER E WITH MACRON]
+"\u0113" => "e"
+
+# ĕ  [LATIN SMALL LETTER E WITH BREVE]
+"\u0115" => "e"
+
+# ė  [LATIN SMALL LETTER E WITH DOT ABOVE]
+"\u0117" => "e"
+
+# ę  [LATIN SMALL LETTER E WITH OGONEK]
+"\u0119" => "e"
+
+# ě  [LATIN SMALL LETTER E WITH CARON]
+"\u011B" => "e"
+
+# ǝ  [LATIN SMALL LETTER TURNED E]
+"\u01DD" => "e"
+
+# ȅ  [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
+"\u0205" => "e"
+
+# ȇ  [LATIN SMALL LETTER E WITH INVERTED BREVE]
+"\u0207" => "e"
+
+# ȩ  [LATIN SMALL LETTER E WITH CEDILLA]
+"\u0229" => "e"
+
+# ɇ  [LATIN SMALL LETTER E WITH STROKE]
+"\u0247" => "e"
+
+# ɘ  [LATIN SMALL LETTER REVERSED E]
+"\u0258" => "e"
+
+# ɛ  [LATIN SMALL LETTER OPEN E]
+"\u025B" => "e"
+
+# ɜ  [LATIN SMALL LETTER REVERSED OPEN E]
+"\u025C" => "e"
+
+# ɝ  [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
+"\u025D" => "e"
+
+# ɞ  [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
+"\u025E" => "e"
+
+# ʚ  [LATIN SMALL LETTER CLOSED OPEN E]
+"\u029A" => "e"
+
+# ᴈ  [LATIN SMALL LETTER TURNED OPEN E]
+"\u1D08" => "e"
+
+# ᶒ  [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
+"\u1D92" => "e"
+
+# ᶓ  [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
+"\u1D93" => "e"
+
+# ᶔ  [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
+"\u1D94" => "e"
+
+# ḕ  [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
+"\u1E15" => "e"
+
+# ḗ  [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
+"\u1E17" => "e"
+
+# ḙ  [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
+"\u1E19" => "e"
+
+# ḛ  [LATIN SMALL LETTER E WITH TILDE BELOW]
+"\u1E1B" => "e"
+
+# ḝ  [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
+"\u1E1D" => "e"
+
+# ẹ  [LATIN SMALL LETTER E WITH DOT BELOW]
+"\u1EB9" => "e"
+
+# ẻ  [LATIN SMALL LETTER E WITH HOOK ABOVE]
+"\u1EBB" => "e"
+
+# ẽ  [LATIN SMALL LETTER E WITH TILDE]
+"\u1EBD" => "e"
+
+# ế  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
+"\u1EBF" => "e"
+
+# ề  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
+"\u1EC1" => "e"
+
+# ể  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1EC3" => "e"
+
+# ễ  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
+"\u1EC5" => "e"
+
+# ệ  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+"\u1EC7" => "e"
+
+# ₑ  [LATIN SUBSCRIPT SMALL LETTER E]
+"\u2091" => "e"
+
+# ⓔ  [CIRCLED LATIN SMALL LETTER E]
+"\u24D4" => "e"
+
+# ⱸ  [LATIN SMALL LETTER E WITH NOTCH]
+"\u2C78" => "e"
+
+# ｅ  [FULLWIDTH LATIN SMALL LETTER E]
+"\uFF45" => "e"
+
+# ⒠  [PARENTHESIZED LATIN SMALL LETTER E]
+"\u24A0" => "(e)"
+
+# Ƒ  [LATIN CAPITAL LETTER F WITH HOOK]
+"\u0191" => "F"
+
+# Ḟ  [LATIN CAPITAL LETTER F WITH DOT ABOVE]
+"\u1E1E" => "F"
+
+# Ⓕ  [CIRCLED LATIN CAPITAL LETTER F]
+"\u24BB" => "F"
+
+# ꜰ  [LATIN LETTER SMALL CAPITAL F]
+"\uA730" => "F"
+
+# Ꝼ  [LATIN CAPITAL LETTER INSULAR F]
+"\uA77B" => "F"
+
+# ꟻ  [LATIN EPIGRAPHIC LETTER REVERSED F]
+"\uA7FB" => "F"
+
+# Ｆ  [FULLWIDTH LATIN CAPITAL LETTER F]
+"\uFF26" => "F"
+
+# ƒ  [LATIN SMALL LETTER F WITH HOOK]
+"\u0192" => "f"
+
+# ᵮ  [LATIN SMALL LETTER F WITH MIDDLE TILDE]
+"\u1D6E" => "f"
+
+# ᶂ  [LATIN SMALL LETTER F WITH PALATAL HOOK]
+"\u1D82" => "f"
+
+# ḟ  [LATIN SMALL LETTER F WITH DOT ABOVE]
+"\u1E1F" => "f"
+
+# ẛ  [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
+"\u1E9B" => "f"
+
+# ⓕ  [CIRCLED LATIN SMALL LETTER F]
+"\u24D5" => "f"
+
+# ꝼ  [LATIN SMALL LETTER INSULAR F]
+"\uA77C" => "f"
+
+# ｆ  [FULLWIDTH LATIN SMALL LETTER F]
+"\uFF46" => "f"
+
+# ⒡  [PARENTHESIZED LATIN SMALL LETTER F]
+"\u24A1" => "(f)"
+
+# ﬀ  [LATIN SMALL LIGATURE FF]
+"\uFB00" => "ff"
+
+# ﬃ  [LATIN SMALL LIGATURE FFI]
+"\uFB03" => "ffi"
+
+# ﬄ  [LATIN SMALL LIGATURE FFL]
+"\uFB04" => "ffl"
+
+# ﬁ  [LATIN SMALL LIGATURE FI]
+"\uFB01" => "fi"
+
+# ﬂ  [LATIN SMALL LIGATURE FL]
+"\uFB02" => "fl"
+
+# Ĝ  [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
+"\u011C" => "G"
+
+# Ğ  [LATIN CAPITAL LETTER G WITH BREVE]
+"\u011E" => "G"
+
+# Ġ  [LATIN CAPITAL LETTER G WITH DOT ABOVE]
+"\u0120" => "G"
+
+# Ģ  [LATIN CAPITAL LETTER G WITH CEDILLA]
+"\u0122" => "G"
+
+# Ɠ  [LATIN CAPITAL LETTER G WITH HOOK]
+"\u0193" => "G"
+
+# Ǥ  [LATIN CAPITAL LETTER G WITH STROKE]
+"\u01E4" => "G"
+
+# ǥ  [LATIN SMALL LETTER G WITH STROKE]
+"\u01E5" => "G"
+
+# Ǧ  [LATIN CAPITAL LETTER G WITH CARON]
+"\u01E6" => "G"
+
+# ǧ  [LATIN SMALL LETTER G WITH CARON]
+"\u01E7" => "G"
+
+# Ǵ  [LATIN CAPITAL LETTER G WITH ACUTE]
+"\u01F4" => "G"
+
+# ɢ  [LATIN LETTER SMALL CAPITAL G]
+"\u0262" => "G"
+
+# ʛ  [LATIN LETTER SMALL CAPITAL G WITH HOOK]
+"\u029B" => "G"
+
+# Ḡ  [LATIN CAPITAL LETTER G WITH MACRON]
+"\u1E20" => "G"
+
+# Ⓖ  [CIRCLED LATIN CAPITAL LETTER G]
+"\u24BC" => "G"
+
+# Ᵹ  [LATIN CAPITAL LETTER INSULAR G]
+"\uA77D" => "G"
+
+# Ꝿ  [LATIN CAPITAL LETTER TURNED INSULAR G]
+"\uA77E" => "G"
+
+# Ｇ  [FULLWIDTH LATIN CAPITAL LETTER G]
+"\uFF27" => "G"
+
+# ĝ  [LATIN SMALL LETTER G WITH CIRCUMFLEX]
+"\u011D" => "g"
+
+# ğ  [LATIN SMALL LETTER G WITH BREVE]
+"\u011F" => "g"
+
+# ġ  [LATIN SMALL LETTER G WITH DOT ABOVE]
+"\u0121" => "g"
+
+# ģ  [LATIN SMALL LETTER G WITH CEDILLA]
+"\u0123" => "g"
+
+# ǵ  [LATIN SMALL LETTER G WITH ACUTE]
+"\u01F5" => "g"
+
+# ɠ  [LATIN SMALL LETTER G WITH HOOK]
+"\u0260" => "g"
+
+# ɡ  [LATIN SMALL LETTER SCRIPT G]
+"\u0261" => "g"
+
+# ᵷ  [LATIN SMALL LETTER TURNED G]
+"\u1D77" => "g"
+
+# ᵹ  [LATIN SMALL LETTER INSULAR G]
+"\u1D79" => "g"
+
+# ᶃ  [LATIN SMALL LETTER G WITH PALATAL HOOK]
+"\u1D83" => "g"
+
+# ḡ  [LATIN SMALL LETTER G WITH MACRON]
+"\u1E21" => "g"
+
+# ⓖ  [CIRCLED LATIN SMALL LETTER G]
+"\u24D6" => "g"
+
+# ꝿ  [LATIN SMALL LETTER TURNED INSULAR G]
+"\uA77F" => "g"
+
+# ｇ  [FULLWIDTH LATIN SMALL LETTER G]
+"\uFF47" => "g"
+
+# ⒢  [PARENTHESIZED LATIN SMALL LETTER G]
+"\u24A2" => "(g)"
+
+# Ĥ  [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
+"\u0124" => "H"
+
+# Ħ  [LATIN CAPITAL LETTER H WITH STROKE]
+"\u0126" => "H"
+
+# Ȟ  [LATIN CAPITAL LETTER H WITH CARON]
+"\u021E" => "H"
+
+# ʜ  [LATIN LETTER SMALL CAPITAL H]
+"\u029C" => "H"
+
+# Ḣ  [LATIN CAPITAL LETTER H WITH DOT ABOVE]
+"\u1E22" => "H"
+
+# Ḥ  [LATIN CAPITAL LETTER H WITH DOT BELOW]
+"\u1E24" => "H"
+
+# Ḧ  [LATIN CAPITAL LETTER H WITH DIAERESIS]
+"\u1E26" => "H"
+
+# Ḩ  [LATIN CAPITAL LETTER H WITH CEDILLA]
+"\u1E28" => "H"
+
+# Ḫ  [LATIN CAPITAL LETTER H WITH BREVE BELOW]
+"\u1E2A" => "H"
+
+# Ⓗ  [CIRCLED LATIN CAPITAL LETTER H]
+"\u24BD" => "H"
+
+# Ⱨ  [LATIN CAPITAL LETTER H WITH DESCENDER]
+"\u2C67" => "H"
+
+# Ⱶ  [LATIN CAPITAL LETTER HALF H]
+"\u2C75" => "H"
+
+# Ｈ  [FULLWIDTH LATIN CAPITAL LETTER H]
+"\uFF28" => "H"
+
+# ĥ  [LATIN SMALL LETTER H WITH CIRCUMFLEX]
+"\u0125" => "h"
+
+# ħ  [LATIN SMALL LETTER H WITH STROKE]
+"\u0127" => "h"
+
+# ȟ  [LATIN SMALL LETTER H WITH CARON]
+"\u021F" => "h"
+
+# ɥ  [LATIN SMALL LETTER TURNED H]
+"\u0265" => "h"
+
+# ɦ  [LATIN SMALL LETTER H WITH HOOK]
+"\u0266" => "h"
+
+# ʮ  [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
+"\u02AE" => "h"
+
+# ʯ  [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
+"\u02AF" => "h"
+
+# ḣ  [LATIN SMALL LETTER H WITH DOT ABOVE]
+"\u1E23" => "h"
+
+# ḥ  [LATIN SMALL LETTER H WITH DOT BELOW]
+"\u1E25" => "h"
+
+# ḧ  [LATIN SMALL LETTER H WITH DIAERESIS]
+"\u1E27" => "h"
+
+# ḩ  [LATIN SMALL LETTER H WITH CEDILLA]
+"\u1E29" => "h"
+
+# ḫ  [LATIN SMALL LETTER H WITH BREVE BELOW]
+"\u1E2B" => "h"
+
+# ẖ  [LATIN SMALL LETTER H WITH LINE BELOW]
+"\u1E96" => "h"
+
+# ⓗ  [CIRCLED LATIN SMALL LETTER H]
+"\u24D7" => "h"
+
+# ⱨ  [LATIN SMALL LETTER H WITH DESCENDER]
+"\u2C68" => "h"
+
+# ⱶ  [LATIN SMALL LETTER HALF H]
+"\u2C76" => "h"
+
+# ｈ  [FULLWIDTH LATIN SMALL LETTER H]
+"\uFF48" => "h"
+
+# Ƕ  http://en.wikipedia.org/wiki/Hwair  [LATIN CAPITAL LETTER HWAIR]
+"\u01F6" => "HV"
+
+# ⒣  [PARENTHESIZED LATIN SMALL LETTER H]
+"\u24A3" => "(h)"
+
+# ƕ  [LATIN SMALL LETTER HV]
+"\u0195" => "hv"
+
+# Ì  [LATIN CAPITAL LETTER I WITH GRAVE]
+"\u00CC" => "I"
+
+# Í  [LATIN CAPITAL LETTER I WITH ACUTE]
+"\u00CD" => "I"
+
+# Î  [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
+"\u00CE" => "I"
+
+# Ï  [LATIN CAPITAL LETTER I WITH DIAERESIS]
+"\u00CF" => "I"
+
+# Ĩ  [LATIN CAPITAL LETTER I WITH TILDE]
+"\u0128" => "I"
+
+# Ī  [LATIN CAPITAL LETTER I WITH MACRON]
+"\u012A" => "I"
+
+# Ĭ  [LATIN CAPITAL LETTER I WITH BREVE]
+"\u012C" => "I"
+
+# Į  [LATIN CAPITAL LETTER I WITH OGONEK]
+"\u012E" => "I"
+
+# İ  [LATIN CAPITAL LETTER I WITH DOT ABOVE]
+"\u0130" => "I"
+
+# Ɩ  [LATIN CAPITAL LETTER IOTA]
+"\u0196" => "I"
+
+# Ɨ  [LATIN CAPITAL LETTER I WITH STROKE]
+"\u0197" => "I"
+
+# Ǐ  [LATIN CAPITAL LETTER I WITH CARON]
+"\u01CF" => "I"
+
+# Ȉ  [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
+"\u0208" => "I"
+
+# Ȋ  [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
+"\u020A" => "I"
+
+# ɪ  [LATIN LETTER SMALL CAPITAL I]
+"\u026A" => "I"
+
+# ᵻ  [LATIN SMALL CAPITAL LETTER I WITH STROKE]
+"\u1D7B" => "I"
+
+# Ḭ  [LATIN CAPITAL LETTER I WITH TILDE BELOW]
+"\u1E2C" => "I"
+
+# Ḯ  [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
+"\u1E2E" => "I"
+
+# Ỉ  [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
+"\u1EC8" => "I"
+
+# Ị  [LATIN CAPITAL LETTER I WITH DOT BELOW]
+"\u1ECA" => "I"
+
+# Ⓘ  [CIRCLED LATIN CAPITAL LETTER I]
+"\u24BE" => "I"
+
+# ꟾ  [LATIN EPIGRAPHIC LETTER I LONGA]
+"\uA7FE" => "I"
+
+# Ｉ  [FULLWIDTH LATIN CAPITAL LETTER I]
+"\uFF29" => "I"
+
+# ì  [LATIN SMALL LETTER I WITH GRAVE]
+"\u00EC" => "i"
+
+# í  [LATIN SMALL LETTER I WITH ACUTE]
+"\u00ED" => "i"
+
+# î  [LATIN SMALL LETTER I WITH CIRCUMFLEX]
+"\u00EE" => "i"
+
+# ï  [LATIN SMALL LETTER I WITH DIAERESIS]
+"\u00EF" => "i"
+
+# ĩ  [LATIN SMALL LETTER I WITH TILDE]
+"\u0129" => "i"
+
+# ī  [LATIN SMALL LETTER I WITH MACRON]
+"\u012B" => "i"
+
+# ĭ  [LATIN SMALL LETTER I WITH BREVE]
+"\u012D" => "i"
+
+# į  [LATIN SMALL LETTER I WITH OGONEK]
+"\u012F" => "i"
+
+# ı  [LATIN SMALL LETTER DOTLESS I]
+"\u0131" => "i"
+
+# ǐ  [LATIN SMALL LETTER I WITH CARON]
+"\u01D0" => "i"
+
+# ȉ  [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
+"\u0209" => "i"
+
+# ȋ  [LATIN SMALL LETTER I WITH INVERTED BREVE]
+"\u020B" => "i"
+
+# ɨ  [LATIN SMALL LETTER I WITH STROKE]
+"\u0268" => "i"
+
+# ᴉ  [LATIN SMALL LETTER TURNED I]
+"\u1D09" => "i"
+
+# ᵢ  [LATIN SUBSCRIPT SMALL LETTER I]
+"\u1D62" => "i"
+
+# ᵼ  [LATIN SMALL LETTER IOTA WITH STROKE]
+"\u1D7C" => "i"
+
+# ᶖ  [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
+"\u1D96" => "i"
+
+# ḭ  [LATIN SMALL LETTER I WITH TILDE BELOW]
+"\u1E2D" => "i"
+
+# ḯ  [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
+"\u1E2F" => "i"
+
+# ỉ  [LATIN SMALL LETTER I WITH HOOK ABOVE]
+"\u1EC9" => "i"
+
+# ị  [LATIN SMALL LETTER I WITH DOT BELOW]
+"\u1ECB" => "i"
+
+# ⁱ  [SUPERSCRIPT LATIN SMALL LETTER I]
+"\u2071" => "i"
+
+# ⓘ  [CIRCLED LATIN SMALL LETTER I]
+"\u24D8" => "i"
+
+# ｉ  [FULLWIDTH LATIN SMALL LETTER I]
+"\uFF49" => "i"
+
+# Ĳ  [LATIN CAPITAL LIGATURE IJ]
+"\u0132" => "IJ"
+
+# ⒤  [PARENTHESIZED LATIN SMALL LETTER I]
+"\u24A4" => "(i)"
+
+# ĳ  [LATIN SMALL LIGATURE IJ]
+"\u0133" => "ij"
+
+# Ĵ  [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
+"\u0134" => "J"
+
+# Ɉ  [LATIN CAPITAL LETTER J WITH STROKE]
+"\u0248" => "J"
+
+# ᴊ  [LATIN LETTER SMALL CAPITAL J]
+"\u1D0A" => "J"
+
+# Ⓙ  [CIRCLED LATIN CAPITAL LETTER J]
+"\u24BF" => "J"
+
+# Ｊ  [FULLWIDTH LATIN CAPITAL LETTER J]
+"\uFF2A" => "J"
+
+# ĵ  [LATIN SMALL LETTER J WITH CIRCUMFLEX]
+"\u0135" => "j"
+
+# ǰ  [LATIN SMALL LETTER J WITH CARON]
+"\u01F0" => "j"
+
+# ȷ  [LATIN SMALL LETTER DOTLESS J]
+"\u0237" => "j"
+
+# ɉ  [LATIN SMALL LETTER J WITH STROKE]
+"\u0249" => "j"
+
+# ɟ  [LATIN SMALL LETTER DOTLESS J WITH STROKE]
+"\u025F" => "j"
+
+# ʄ  [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
+"\u0284" => "j"
+
+# ʝ  [LATIN SMALL LETTER J WITH CROSSED-TAIL]
+"\u029D" => "j"
+
+# ⓙ  [CIRCLED LATIN SMALL LETTER J]
+"\u24D9" => "j"
+
+# ⱼ  [LATIN SUBSCRIPT SMALL LETTER J]
+"\u2C7C" => "j"
+
+# ｊ  [FULLWIDTH LATIN SMALL LETTER J]
+"\uFF4A" => "j"
+
+# ⒥  [PARENTHESIZED LATIN SMALL LETTER J]
+"\u24A5" => "(j)"
+
+# Ķ  [LATIN CAPITAL LETTER K WITH CEDILLA]
+"\u0136" => "K"
+
+# Ƙ  [LATIN CAPITAL LETTER K WITH HOOK]
+"\u0198" => "K"
+
+# Ǩ  [LATIN CAPITAL LETTER K WITH CARON]
+"\u01E8" => "K"
+
+# ᴋ  [LATIN LETTER SMALL CAPITAL K]
+"\u1D0B" => "K"
+
+# Ḱ  [LATIN CAPITAL LETTER K WITH ACUTE]
+"\u1E30" => "K"
+
+# Ḳ  [LATIN CAPITAL LETTER K WITH DOT BELOW]
+"\u1E32" => "K"
+
+# Ḵ  [LATIN CAPITAL LETTER K WITH LINE BELOW]
+"\u1E34" => "K"
+
+# Ⓚ  [CIRCLED LATIN CAPITAL LETTER K]
+"\u24C0" => "K"
+
+# Ⱪ  [LATIN CAPITAL LETTER K WITH DESCENDER]
+"\u2C69" => "K"
+
+# Ꝁ  [LATIN CAPITAL LETTER K WITH STROKE]
+"\uA740" => "K"
+
+# Ꝃ  [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
+"\uA742" => "K"
+
+# Ꝅ  [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
+"\uA744" => "K"
+
+# Ｋ  [FULLWIDTH LATIN CAPITAL LETTER K]
+"\uFF2B" => "K"
+
+# ķ  [LATIN SMALL LETTER K WITH CEDILLA]
+"\u0137" => "k"
+
+# ƙ  [LATIN SMALL LETTER K WITH HOOK]
+"\u0199" => "k"
+
+# ǩ  [LATIN SMALL LETTER K WITH CARON]
+"\u01E9" => "k"
+
+# ʞ  [LATIN SMALL LETTER TURNED K]
+"\u029E" => "k"
+
+# ᶄ  [LATIN SMALL LETTER K WITH PALATAL HOOK]
+"\u1D84" => "k"
+
+# ḱ  [LATIN SMALL LETTER K WITH ACUTE]
+"\u1E31" => "k"
+
+# ḳ  [LATIN SMALL LETTER K WITH DOT BELOW]
+"\u1E33" => "k"
+
+# ḵ  [LATIN SMALL LETTER K WITH LINE BELOW]
+"\u1E35" => "k"
+
+# ⓚ  [CIRCLED LATIN SMALL LETTER K]
+"\u24DA" => "k"
+
+# ⱪ  [LATIN SMALL LETTER K WITH DESCENDER]
+"\u2C6A" => "k"
+
+# ꝁ  [LATIN SMALL LETTER K WITH STROKE]
+"\uA741" => "k"
+
+# ꝃ  [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
+"\uA743" => "k"
+
+# ꝅ  [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
+"\uA745" => "k"
+
+# ｋ  [FULLWIDTH LATIN SMALL LETTER K]
+"\uFF4B" => "k"
+
+# ⒦  [PARENTHESIZED LATIN SMALL LETTER K]
+"\u24A6" => "(k)"
+
+# Ĺ  [LATIN CAPITAL LETTER L WITH ACUTE]
+"\u0139" => "L"
+
+# Ļ  [LATIN CAPITAL LETTER L WITH CEDILLA]
+"\u013B" => "L"
+
+# Ľ  [LATIN CAPITAL LETTER L WITH CARON]
+"\u013D" => "L"
+
+# Ŀ  [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
+"\u013F" => "L"
+
+# Ł  [LATIN CAPITAL LETTER L WITH STROKE]
+"\u0141" => "L"
+
+# Ƚ  [LATIN CAPITAL LETTER L WITH BAR]
+"\u023D" => "L"
+
+# ʟ  [LATIN LETTER SMALL CAPITAL L]
+"\u029F" => "L"
+
+# ᴌ  [LATIN LETTER SMALL CAPITAL L WITH STROKE]
+"\u1D0C" => "L"
+
+# Ḷ  [LATIN CAPITAL LETTER L WITH DOT BELOW]
+"\u1E36" => "L"
+
+# Ḹ  [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
+"\u1E38" => "L"
+
+# Ḻ  [LATIN CAPITAL LETTER L WITH LINE BELOW]
+"\u1E3A" => "L"
+
+# Ḽ  [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
+"\u1E3C" => "L"
+
+# Ⓛ  [CIRCLED LATIN CAPITAL LETTER L]
+"\u24C1" => "L"
+
+# Ⱡ  [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
+"\u2C60" => "L"
+
+# Ɫ  [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
+"\u2C62" => "L"
+
+# Ꝇ  [LATIN CAPITAL LETTER BROKEN L]
+"\uA746" => "L"
+
+# Ꝉ  [LATIN CAPITAL LETTER L WITH HIGH STROKE]
+"\uA748" => "L"
+
+# Ꞁ  [LATIN CAPITAL LETTER TURNED L]
+"\uA780" => "L"
+
+# Ｌ  [FULLWIDTH LATIN CAPITAL LETTER L]
+"\uFF2C" => "L"
+
+# ĺ  [LATIN SMALL LETTER L WITH ACUTE]
+"\u013A" => "l"
+
+# ļ  [LATIN SMALL LETTER L WITH CEDILLA]
+"\u013C" => "l"
+
+# ľ  [LATIN SMALL LETTER L WITH CARON]
+"\u013E" => "l"
+
+# ŀ  [LATIN SMALL LETTER L WITH MIDDLE DOT]
+"\u0140" => "l"
+
+# ł  [LATIN SMALL LETTER L WITH STROKE]
+"\u0142" => "l"
+
+# ƚ  [LATIN SMALL LETTER L WITH BAR]
+"\u019A" => "l"
+
+# ȴ  [LATIN SMALL LETTER L WITH CURL]
+"\u0234" => "l"
+
+# ɫ  [LATIN SMALL LETTER L WITH MIDDLE TILDE]
+"\u026B" => "l"
+
+# ɬ  [LATIN SMALL LETTER L WITH BELT]
+"\u026C" => "l"
+
+# ɭ  [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
+"\u026D" => "l"
+
+# ᶅ  [LATIN SMALL LETTER L WITH PALATAL HOOK]
+"\u1D85" => "l"
+
+# ḷ  [LATIN SMALL LETTER L WITH DOT BELOW]
+"\u1E37" => "l"
+
+# ḹ  [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
+"\u1E39" => "l"
+
+# ḻ  [LATIN SMALL LETTER L WITH LINE BELOW]
+"\u1E3B" => "l"
+
+# ḽ  [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
+"\u1E3D" => "l"
+
+# ⓛ  [CIRCLED LATIN SMALL LETTER L]
+"\u24DB" => "l"
+
+# ⱡ  [LATIN SMALL LETTER L WITH DOUBLE BAR]
+"\u2C61" => "l"
+
+# ꝇ  [LATIN SMALL LETTER BROKEN L]
+"\uA747" => "l"
+
+# ꝉ  [LATIN SMALL LETTER L WITH HIGH STROKE]
+"\uA749" => "l"
+
+# ꞁ  [LATIN SMALL LETTER TURNED L]
+"\uA781" => "l"
+
+# ｌ  [FULLWIDTH LATIN SMALL LETTER L]
+"\uFF4C" => "l"
+
+# Ǉ  [LATIN CAPITAL LETTER LJ]
+"\u01C7" => "LJ"
+
+# Ỻ  [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
+"\u1EFA" => "LL"
+
+# ǈ  [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
+"\u01C8" => "Lj"
+
+# ⒧  [PARENTHESIZED LATIN SMALL LETTER L]
+"\u24A7" => "(l)"
+
+# ǉ  [LATIN SMALL LETTER LJ]
+"\u01C9" => "lj"
+
+# ỻ  [LATIN SMALL LETTER MIDDLE-WELSH LL]
+"\u1EFB" => "ll"
+
+# ʪ  [LATIN SMALL LETTER LS DIGRAPH]
+"\u02AA" => "ls"
+
+# ʫ  [LATIN SMALL LETTER LZ DIGRAPH]
+"\u02AB" => "lz"
+
+# Ɯ  [LATIN CAPITAL LETTER TURNED M]
+"\u019C" => "M"
+
+# ᴍ  [LATIN LETTER SMALL CAPITAL M]
+"\u1D0D" => "M"
+
+# Ḿ  [LATIN CAPITAL LETTER M WITH ACUTE]
+"\u1E3E" => "M"
+
+# Ṁ  [LATIN CAPITAL LETTER M WITH DOT ABOVE]
+"\u1E40" => "M"
+
+# Ṃ  [LATIN CAPITAL LETTER M WITH DOT BELOW]
+"\u1E42" => "M"
+
+# Ⓜ  [CIRCLED LATIN CAPITAL LETTER M]
+"\u24C2" => "M"
+
+# Ɱ  [LATIN CAPITAL LETTER M WITH HOOK]
+"\u2C6E" => "M"
+
+# ꟽ  [LATIN EPIGRAPHIC LETTER INVERTED M]
+"\uA7FD" => "M"
+
+# ꟿ  [LATIN EPIGRAPHIC LETTER ARCHAIC M]
+"\uA7FF" => "M"
+
+# Ｍ  [FULLWIDTH LATIN CAPITAL LETTER M]
+"\uFF2D" => "M"
+
+# ɯ  [LATIN SMALL LETTER TURNED M]
+"\u026F" => "m"
+
+# ɰ  [LATIN SMALL LETTER TURNED M WITH LONG LEG]
+"\u0270" => "m"
+
+# ɱ  [LATIN SMALL LETTER M WITH HOOK]
+"\u0271" => "m"
+
+# ᵯ  [LATIN SMALL LETTER M WITH MIDDLE TILDE]
+"\u1D6F" => "m"
+
+# ᶆ  [LATIN SMALL LETTER M WITH PALATAL HOOK]
+"\u1D86" => "m"
+
+# ḿ  [LATIN SMALL LETTER M WITH ACUTE]
+"\u1E3F" => "m"
+
+# ṁ  [LATIN SMALL LETTER M WITH DOT ABOVE]
+"\u1E41" => "m"
+
+# ṃ  [LATIN SMALL LETTER M WITH DOT BELOW]
+"\u1E43" => "m"
+
+# ⓜ  [CIRCLED LATIN SMALL LETTER M]
+"\u24DC" => "m"
+
+# ｍ  [FULLWIDTH LATIN SMALL LETTER M]
+"\uFF4D" => "m"
+
+# ⒨  [PARENTHESIZED LATIN SMALL LETTER M]
+"\u24A8" => "(m)"
+
+# Ñ  [LATIN CAPITAL LETTER N WITH TILDE]
+"\u00D1" => "N"
+
+# Ń  [LATIN CAPITAL LETTER N WITH ACUTE]
+"\u0143" => "N"
+
+# Ņ  [LATIN CAPITAL LETTER N WITH CEDILLA]
+"\u0145" => "N"
+
+# Ň  [LATIN CAPITAL LETTER N WITH CARON]
+"\u0147" => "N"
+
+# Ŋ  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN CAPITAL LETTER ENG]
+"\u014A" => "N"
+
+# Ɲ  [LATIN CAPITAL LETTER N WITH LEFT HOOK]
+"\u019D" => "N"
+
+# Ǹ  [LATIN CAPITAL LETTER N WITH GRAVE]
+"\u01F8" => "N"
+
+# Ƞ  [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
+"\u0220" => "N"
+
+# ɴ  [LATIN LETTER SMALL CAPITAL N]
+"\u0274" => "N"
+
+# ᴎ  [LATIN LETTER SMALL CAPITAL REVERSED N]
+"\u1D0E" => "N"
+
+# Ṅ  [LATIN CAPITAL LETTER N WITH DOT ABOVE]
+"\u1E44" => "N"
+
+# Ṇ  [LATIN CAPITAL LETTER N WITH DOT BELOW]
+"\u1E46" => "N"
+
+# Ṉ  [LATIN CAPITAL LETTER N WITH LINE BELOW]
+"\u1E48" => "N"
+
+# Ṋ  [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
+"\u1E4A" => "N"
+
+# Ⓝ  [CIRCLED LATIN CAPITAL LETTER N]
+"\u24C3" => "N"
+
+# Ｎ  [FULLWIDTH LATIN CAPITAL LETTER N]
+"\uFF2E" => "N"
+
+# ñ  [LATIN SMALL LETTER N WITH TILDE]
+"\u00F1" => "n"
+
+# ń  [LATIN SMALL LETTER N WITH ACUTE]
+"\u0144" => "n"
+
+# ņ  [LATIN SMALL LETTER N WITH CEDILLA]
+"\u0146" => "n"
+
+# ň  [LATIN SMALL LETTER N WITH CARON]
+"\u0148" => "n"
+
+# ŉ  [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
+"\u0149" => "n"
+
+# ŋ  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN SMALL LETTER ENG]
+"\u014B" => "n"
+
+# ƞ  [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
+"\u019E" => "n"
+
+# ǹ  [LATIN SMALL LETTER N WITH GRAVE]
+"\u01F9" => "n"
+
+# ȵ  [LATIN SMALL LETTER N WITH CURL]
+"\u0235" => "n"
+
+# ɲ  [LATIN SMALL LETTER N WITH LEFT HOOK]
+"\u0272" => "n"
+
+# ɳ  [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
+"\u0273" => "n"
+
+# ᵰ  [LATIN SMALL LETTER N WITH MIDDLE TILDE]
+"\u1D70" => "n"
+
+# ᶇ  [LATIN SMALL LETTER N WITH PALATAL HOOK]
+"\u1D87" => "n"
+
+# ṅ  [LATIN SMALL LETTER N WITH DOT ABOVE]
+"\u1E45" => "n"
+
+# ṇ  [LATIN SMALL LETTER N WITH DOT BELOW]
+"\u1E47" => "n"
+
+# ṉ  [LATIN SMALL LETTER N WITH LINE BELOW]
+"\u1E49" => "n"
+
+# ṋ  [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
+"\u1E4B" => "n"
+
+# ⁿ  [SUPERSCRIPT LATIN SMALL LETTER N]
+"\u207F" => "n"
+
+# ⓝ  [CIRCLED LATIN SMALL LETTER N]
+"\u24DD" => "n"
+
+# ｎ  [FULLWIDTH LATIN SMALL LETTER N]
+"\uFF4E" => "n"
+
+# Ǌ  [LATIN CAPITAL LETTER NJ]
+"\u01CA" => "NJ"
+
+# ǋ  [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
+"\u01CB" => "Nj"
+
+# ⒩  [PARENTHESIZED LATIN SMALL LETTER N]
+"\u24A9" => "(n)"
+
+# ǌ  [LATIN SMALL LETTER NJ]
+"\u01CC" => "nj"
+
+# Ò  [LATIN CAPITAL LETTER O WITH GRAVE]
+"\u00D2" => "O"
+
+# Ó  [LATIN CAPITAL LETTER O WITH ACUTE]
+"\u00D3" => "O"
+
+# Ô  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
+"\u00D4" => "O"
+
+# Õ  [LATIN CAPITAL LETTER O WITH TILDE]
+"\u00D5" => "O"
+
+# Ö  [LATIN CAPITAL LETTER O WITH DIAERESIS]
+"\u00D6" => "O"
+
+# Ø  [LATIN CAPITAL LETTER O WITH STROKE]
+"\u00D8" => "O"
+
+# Ō  [LATIN CAPITAL LETTER O WITH MACRON]
+"\u014C" => "O"
+
+# Ŏ  [LATIN CAPITAL LETTER O WITH BREVE]
+"\u014E" => "O"
+
+# Ő  [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
+"\u0150" => "O"
+
+# Ɔ  [LATIN CAPITAL LETTER OPEN O]
+"\u0186" => "O"
+
+# Ɵ  [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
+"\u019F" => "O"
+
+# Ơ  [LATIN CAPITAL LETTER O WITH HORN]
+"\u01A0" => "O"
+
+# Ǒ  [LATIN CAPITAL LETTER O WITH CARON]
+"\u01D1" => "O"
+
+# Ǫ  [LATIN CAPITAL LETTER O WITH OGONEK]
+"\u01EA" => "O"
+
+# Ǭ  [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
+"\u01EC" => "O"
+
+# Ǿ  [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
+"\u01FE" => "O"
+
+# Ȍ  [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
+"\u020C" => "O"
+
+# Ȏ  [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
+"\u020E" => "O"
+
+# Ȫ  [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
+"\u022A" => "O"
+
+# Ȭ  [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
+"\u022C" => "O"
+
+# Ȯ  [LATIN CAPITAL LETTER O WITH DOT ABOVE]
+"\u022E" => "O"
+
+# Ȱ  [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
+"\u0230" => "O"
+
+# ᴏ  [LATIN LETTER SMALL CAPITAL O]
+"\u1D0F" => "O"
+
+# ᴐ  [LATIN LETTER SMALL CAPITAL OPEN O]
+"\u1D10" => "O"
+
+# Ṍ  [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
+"\u1E4C" => "O"
+
+# Ṏ  [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
+"\u1E4E" => "O"
+
+# Ṑ  [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
+"\u1E50" => "O"
+
+# Ṓ  [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
+"\u1E52" => "O"
+
+# Ọ  [LATIN CAPITAL LETTER O WITH DOT BELOW]
+"\u1ECC" => "O"
+
+# Ỏ  [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
+"\u1ECE" => "O"
+
+# Ố  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
+"\u1ED0" => "O"
+
+# Ồ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
+"\u1ED2" => "O"
+
+# Ổ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1ED4" => "O"
+
+# Ỗ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
+"\u1ED6" => "O"
+
+# Ộ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+"\u1ED8" => "O"
+
+# Ớ  [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
+"\u1EDA" => "O"
+
+# Ờ  [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
+"\u1EDC" => "O"
+
+# Ở  [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
+"\u1EDE" => "O"
+
+# Ỡ  [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
+"\u1EE0" => "O"
+
+# Ợ  [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
+"\u1EE2" => "O"
+
+# Ⓞ  [CIRCLED LATIN CAPITAL LETTER O]
+"\u24C4" => "O"
+
+# Ꝋ  [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
+"\uA74A" => "O"
+
+# Ꝍ  [LATIN CAPITAL LETTER O WITH LOOP]
+"\uA74C" => "O"
+
+# Ｏ  [FULLWIDTH LATIN CAPITAL LETTER O]
+"\uFF2F" => "O"
+
+# ò  [LATIN SMALL LETTER O WITH GRAVE]
+"\u00F2" => "o"
+
+# ó  [LATIN SMALL LETTER O WITH ACUTE]
+"\u00F3" => "o"
+
+# ô  [LATIN SMALL LETTER O WITH CIRCUMFLEX]
+"\u00F4" => "o"
+
+# õ  [LATIN SMALL LETTER O WITH TILDE]
+"\u00F5" => "o"
+
+# ö  [LATIN SMALL LETTER O WITH DIAERESIS]
+"\u00F6" => "o"
+
+# ø  [LATIN SMALL LETTER O WITH STROKE]
+"\u00F8" => "o"
+
+# ō  [LATIN SMALL LETTER O WITH MACRON]
+"\u014D" => "o"
+
+# ŏ  [LATIN SMALL LETTER O WITH BREVE]
+"\u014F" => "o"
+
+# ő  [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
+"\u0151" => "o"
+
+# ơ  [LATIN SMALL LETTER O WITH HORN]
+"\u01A1" => "o"
+
+# ǒ  [LATIN SMALL LETTER O WITH CARON]
+"\u01D2" => "o"
+
+# ǫ  [LATIN SMALL LETTER O WITH OGONEK]
+"\u01EB" => "o"
+
+# ǭ  [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
+"\u01ED" => "o"
+
+# ǿ  [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
+"\u01FF" => "o"
+
+# ȍ  [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
+"\u020D" => "o"
+
+# ȏ  [LATIN SMALL LETTER O WITH INVERTED BREVE]
+"\u020F" => "o"
+
+# ȫ  [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
+"\u022B" => "o"
+
+# ȭ  [LATIN SMALL LETTER O WITH TILDE AND MACRON]
+"\u022D" => "o"
+
+# ȯ  [LATIN SMALL LETTER O WITH DOT ABOVE]
+"\u022F" => "o"
+
+# ȱ  [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
+"\u0231" => "o"
+
+# ɔ  [LATIN SMALL LETTER OPEN O]
+"\u0254" => "o"
+
+# ɵ  [LATIN SMALL LETTER BARRED O]
+"\u0275" => "o"
+
+# ᴖ  [LATIN SMALL LETTER TOP HALF O]
+"\u1D16" => "o"
+
+# ᴗ  [LATIN SMALL LETTER BOTTOM HALF O]
+"\u1D17" => "o"
+
+# ᶗ  [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
+"\u1D97" => "o"
+
+# ṍ  [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
+"\u1E4D" => "o"
+
+# ṏ  [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
+"\u1E4F" => "o"
+
+# ṑ  [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
+"\u1E51" => "o"
+
+# ṓ  [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
+"\u1E53" => "o"
+
+# ọ  [LATIN SMALL LETTER O WITH DOT BELOW]
+"\u1ECD" => "o"
+
+# ỏ  [LATIN SMALL LETTER O WITH HOOK ABOVE]
+"\u1ECF" => "o"
+
+# ố  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
+"\u1ED1" => "o"
+
+# ồ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
+"\u1ED3" => "o"
+
+# ổ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1ED5" => "o"
+
+# ỗ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
+"\u1ED7" => "o"
+
+# ộ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+"\u1ED9" => "o"
+
+# ớ  [LATIN SMALL LETTER O WITH HORN AND ACUTE]
+"\u1EDB" => "o"
+
+# ờ  [LATIN SMALL LETTER O WITH HORN AND GRAVE]
+"\u1EDD" => "o"
+
+# ở  [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
+"\u1EDF" => "o"
+
+# ỡ  [LATIN SMALL LETTER O WITH HORN AND TILDE]
+"\u1EE1" => "o"
+
+# ợ  [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
+"\u1EE3" => "o"
+
+# ₒ  [LATIN SUBSCRIPT SMALL LETTER O]
+"\u2092" => "o"
+
+# ⓞ  [CIRCLED LATIN SMALL LETTER O]
+"\u24DE" => "o"
+
+# ⱺ  [LATIN SMALL LETTER O WITH LOW RING INSIDE]
+"\u2C7A" => "o"
+
+# ꝋ  [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
+"\uA74B" => "o"
+
+# ꝍ  [LATIN SMALL LETTER O WITH LOOP]
+"\uA74D" => "o"
+
+# ｏ  [FULLWIDTH LATIN SMALL LETTER O]
+"\uFF4F" => "o"
+
+# Œ  [LATIN CAPITAL LIGATURE OE]
+"\u0152" => "OE"
+
+# ɶ  [LATIN LETTER SMALL CAPITAL OE]
+"\u0276" => "OE"
+
+# Ꝏ  [LATIN CAPITAL LETTER OO]
+"\uA74E" => "OO"
+
+# Ȣ  http://en.wikipedia.org/wiki/OU  [LATIN CAPITAL LETTER OU]
+"\u0222" => "OU"
+
+# ᴕ  [LATIN LETTER SMALL CAPITAL OU]
+"\u1D15" => "OU"
+
+# ⒪  [PARENTHESIZED LATIN SMALL LETTER O]
+"\u24AA" => "(o)"
+
+# œ  [LATIN SMALL LIGATURE OE]
+"\u0153" => "oe"
+
+# ᴔ  [LATIN SMALL LETTER TURNED OE]
+"\u1D14" => "oe"
+
+# ꝏ  [LATIN SMALL LETTER OO]
+"\uA74F" => "oo"
+
+# ȣ  http://en.wikipedia.org/wiki/OU  [LATIN SMALL LETTER OU]
+"\u0223" => "ou"
+
+# Ƥ  [LATIN CAPITAL LETTER P WITH HOOK]
+"\u01A4" => "P"
+
+# ᴘ  [LATIN LETTER SMALL CAPITAL P]
+"\u1D18" => "P"
+
+# Ṕ  [LATIN CAPITAL LETTER P WITH ACUTE]
+"\u1E54" => "P"
+
+# Ṗ  [LATIN CAPITAL LETTER P WITH DOT ABOVE]
+"\u1E56" => "P"
+
+# Ⓟ  [CIRCLED LATIN CAPITAL LETTER P]
+"\u24C5" => "P"
+
+# Ᵽ  [LATIN CAPITAL LETTER P WITH STROKE]
+"\u2C63" => "P"
+
+# Ꝑ  [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
+"\uA750" => "P"
+
+# Ꝓ  [LATIN CAPITAL LETTER P WITH FLOURISH]
+"\uA752" => "P"
+
+# Ꝕ  [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
+"\uA754" => "P"
+
+# Ｐ  [FULLWIDTH LATIN CAPITAL LETTER P]
+"\uFF30" => "P"
+
+# ƥ  [LATIN SMALL LETTER P WITH HOOK]
+"\u01A5" => "p"
+
+# ᵱ  [LATIN SMALL LETTER P WITH MIDDLE TILDE]
+"\u1D71" => "p"
+
+# ᵽ  [LATIN SMALL LETTER P WITH STROKE]
+"\u1D7D" => "p"
+
+# ᶈ  [LATIN SMALL LETTER P WITH PALATAL HOOK]
+"\u1D88" => "p"
+
+# ṕ  [LATIN SMALL LETTER P WITH ACUTE]
+"\u1E55" => "p"
+
+# ṗ  [LATIN SMALL LETTER P WITH DOT ABOVE]
+"\u1E57" => "p"
+
+# ⓟ  [CIRCLED LATIN SMALL LETTER P]
+"\u24DF" => "p"
+
+# ꝑ  [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
+"\uA751" => "p"
+
+# ꝓ  [LATIN SMALL LETTER P WITH FLOURISH]
+"\uA753" => "p"
+
+# ꝕ  [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
+"\uA755" => "p"
+
+# ꟼ  [LATIN EPIGRAPHIC LETTER REVERSED P]
+"\uA7FC" => "p"
+
+# ｐ  [FULLWIDTH LATIN SMALL LETTER P]
+"\uFF50" => "p"
+
+# ⒫  [PARENTHESIZED LATIN SMALL LETTER P]
+"\u24AB" => "(p)"
+
+# Ɋ  [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
+"\u024A" => "Q"
+
+# Ⓠ  [CIRCLED LATIN CAPITAL LETTER Q]
+"\u24C6" => "Q"
+
+# Ꝗ  [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
+"\uA756" => "Q"
+
+# Ꝙ  [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
+"\uA758" => "Q"
+
+# Ｑ  [FULLWIDTH LATIN CAPITAL LETTER Q]
+"\uFF31" => "Q"
+
+# ĸ  http://en.wikipedia.org/wiki/Kra_(letter)  [LATIN SMALL LETTER KRA]
+"\u0138" => "q"
+
+# ɋ  [LATIN SMALL LETTER Q WITH HOOK TAIL]
+"\u024B" => "q"
+
+# ʠ  [LATIN SMALL LETTER Q WITH HOOK]
+"\u02A0" => "q"
+
+# ⓠ  [CIRCLED LATIN SMALL LETTER Q]
+"\u24E0" => "q"
+
+# ꝗ  [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
+"\uA757" => "q"
+
+# ꝙ  [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
+"\uA759" => "q"
+
+# ｑ  [FULLWIDTH LATIN SMALL LETTER Q]
+"\uFF51" => "q"
+
+# ⒬  [PARENTHESIZED LATIN SMALL LETTER Q]
+"\u24AC" => "(q)"
+
+# ȹ  [LATIN SMALL LETTER QP DIGRAPH]
+"\u0239" => "qp"
+
+# Ŕ  [LATIN CAPITAL LETTER R WITH ACUTE]
+"\u0154" => "R"
+
+# Ŗ  [LATIN CAPITAL LETTER R WITH CEDILLA]
+"\u0156" => "R"
+
+# Ř  [LATIN CAPITAL LETTER R WITH CARON]
+"\u0158" => "R"
+
+# Ȓ  [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
+"\u0210" => "R"
+
+# Ȓ  [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
+"\u0212" => "R"
+
+# Ɍ  [LATIN CAPITAL LETTER R WITH STROKE]
+"\u024C" => "R"
+
+# ʀ  [LATIN LETTER SMALL CAPITAL R]
+"\u0280" => "R"
+
+# ʁ  [LATIN LETTER SMALL CAPITAL INVERTED R]
+"\u0281" => "R"
+
+# ᴙ  [LATIN LETTER SMALL CAPITAL REVERSED R]
+"\u1D19" => "R"
+
+# ᴚ  [LATIN LETTER SMALL CAPITAL TURNED R]
+"\u1D1A" => "R"
+
+# Ṙ  [LATIN CAPITAL LETTER R WITH DOT ABOVE]
+"\u1E58" => "R"
+
+# Ṛ  [LATIN CAPITAL LETTER R WITH DOT BELOW]
+"\u1E5A" => "R"
+
+# Ṝ  [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
+"\u1E5C" => "R"
+
+# Ṟ  [LATIN CAPITAL LETTER R WITH LINE BELOW]
+"\u1E5E" => "R"
+
+# Ⓡ  [CIRCLED LATIN CAPITAL LETTER R]
+"\u24C7" => "R"
+
+# Ɽ  [LATIN CAPITAL LETTER R WITH TAIL]
+"\u2C64" => "R"
+
+# Ꝛ  [LATIN CAPITAL LETTER R ROTUNDA]
+"\uA75A" => "R"
+
+# Ꞃ  [LATIN CAPITAL LETTER INSULAR R]
+"\uA782" => "R"
+
+# Ｒ  [FULLWIDTH LATIN CAPITAL LETTER R]
+"\uFF32" => "R"
+
+# ŕ  [LATIN SMALL LETTER R WITH ACUTE]
+"\u0155" => "r"
+
+# ŗ  [LATIN SMALL LETTER R WITH CEDILLA]
+"\u0157" => "r"
+
+# ř  [LATIN SMALL LETTER R WITH CARON]
+"\u0159" => "r"
+
+# ȑ  [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
+"\u0211" => "r"
+
+# ȓ  [LATIN SMALL LETTER R WITH INVERTED BREVE]
+"\u0213" => "r"
+
+# ɍ  [LATIN SMALL LETTER R WITH STROKE]
+"\u024D" => "r"
+
+# ɼ  [LATIN SMALL LETTER R WITH LONG LEG]
+"\u027C" => "r"
+
+# ɽ  [LATIN SMALL LETTER R WITH TAIL]
+"\u027D" => "r"
+
+# ɾ  [LATIN SMALL LETTER R WITH FISHHOOK]
+"\u027E" => "r"
+
+# ɿ  [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
+"\u027F" => "r"
+
+# ᵣ  [LATIN SUBSCRIPT SMALL LETTER R]
+"\u1D63" => "r"
+
+# ᵲ  [LATIN SMALL LETTER R WITH MIDDLE TILDE]
+"\u1D72" => "r"
+
+# ᵳ  [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
+"\u1D73" => "r"
+
+# ᶉ  [LATIN SMALL LETTER R WITH PALATAL HOOK]
+"\u1D89" => "r"
+
+# ṙ  [LATIN SMALL LETTER R WITH DOT ABOVE]
+"\u1E59" => "r"
+
+# ṛ  [LATIN SMALL LETTER R WITH DOT BELOW]
+"\u1E5B" => "r"
+
+# ṝ  [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
+"\u1E5D" => "r"
+
+# ṟ  [LATIN SMALL LETTER R WITH LINE BELOW]
+"\u1E5F" => "r"
+
+# ⓡ  [CIRCLED LATIN SMALL LETTER R]
+"\u24E1" => "r"
+
+# ꝛ  [LATIN SMALL LETTER R ROTUNDA]
+"\uA75B" => "r"
+
+# ꞃ  [LATIN SMALL LETTER INSULAR R]
+"\uA783" => "r"
+
+# ｒ  [FULLWIDTH LATIN SMALL LETTER R]
+"\uFF52" => "r"
+
+# ⒭  [PARENTHESIZED LATIN SMALL LETTER R]
+"\u24AD" => "(r)"
+
+# Ś  [LATIN CAPITAL LETTER S WITH ACUTE]
+"\u015A" => "S"
+
+# Ŝ  [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
+"\u015C" => "S"
+
+# Ş  [LATIN CAPITAL LETTER S WITH CEDILLA]
+"\u015E" => "S"
+
+# Š  [LATIN CAPITAL LETTER S WITH CARON]
+"\u0160" => "S"
+
+# Ș  [LATIN CAPITAL LETTER S WITH COMMA BELOW]
+"\u0218" => "S"
+
+# Ṡ  [LATIN CAPITAL LETTER S WITH DOT ABOVE]
+"\u1E60" => "S"
+
+# Ṣ  [LATIN CAPITAL LETTER S WITH DOT BELOW]
+"\u1E62" => "S"
+
+# Ṥ  [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
+"\u1E64" => "S"
+
+# Ṧ  [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
+"\u1E66" => "S"
+
+# Ṩ  [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
+"\u1E68" => "S"
+
+# Ⓢ  [CIRCLED LATIN CAPITAL LETTER S]
+"\u24C8" => "S"
+
+# ꜱ  [LATIN LETTER SMALL CAPITAL S]
+"\uA731" => "S"
+
+# ꞅ  [LATIN SMALL LETTER INSULAR S]
+"\uA785" => "S"
+
+# Ｓ  [FULLWIDTH LATIN CAPITAL LETTER S]
+"\uFF33" => "S"
+
+# ś  [LATIN SMALL LETTER S WITH ACUTE]
+"\u015B" => "s"
+
+# ŝ  [LATIN SMALL LETTER S WITH CIRCUMFLEX]
+"\u015D" => "s"
+
+# ş  [LATIN SMALL LETTER S WITH CEDILLA]
+"\u015F" => "s"
+
+# š  [LATIN SMALL LETTER S WITH CARON]
+"\u0161" => "s"
+
+# ſ  http://en.wikipedia.org/wiki/Long_S  [LATIN SMALL LETTER LONG S]
+"\u017F" => "s"
+
+# ș  [LATIN SMALL LETTER S WITH COMMA BELOW]
+"\u0219" => "s"
+
+# ȿ  [LATIN SMALL LETTER S WITH SWASH TAIL]
+"\u023F" => "s"
+
+# ʂ  [LATIN SMALL LETTER S WITH HOOK]
+"\u0282" => "s"
+
+# ᵴ  [LATIN SMALL LETTER S WITH MIDDLE TILDE]
+"\u1D74" => "s"
+
+# ᶊ  [LATIN SMALL LETTER S WITH PALATAL HOOK]
+"\u1D8A" => "s"
+
+# ṡ  [LATIN SMALL LETTER S WITH DOT ABOVE]
+"\u1E61" => "s"
+
+# ṣ  [LATIN SMALL LETTER S WITH DOT BELOW]
+"\u1E63" => "s"
+
+# ṥ  [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
+"\u1E65" => "s"
+
+# ṧ  [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
+"\u1E67" => "s"
+
+# ṩ  [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
+"\u1E69" => "s"
+
+# ẜ  [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
+"\u1E9C" => "s"
+
+# ẝ  [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
+"\u1E9D" => "s"
+
+# ⓢ  [CIRCLED LATIN SMALL LETTER S]
+"\u24E2" => "s"
+
+# Ꞅ  [LATIN CAPITAL LETTER INSULAR S]
+"\uA784" => "s"
+
+# ｓ  [FULLWIDTH LATIN SMALL LETTER S]
+"\uFF53" => "s"
+
+# ẞ  [LATIN CAPITAL LETTER SHARP S]
+"\u1E9E" => "SS"
+
+# ⒮  [PARENTHESIZED LATIN SMALL LETTER S]
+"\u24AE" => "(s)"
+
+# ß  [LATIN SMALL LETTER SHARP S]
+"\u00DF" => "ss"
+
+# ﬆ  [LATIN SMALL LIGATURE ST]
+"\uFB06" => "st"
+
+# Ţ  [LATIN CAPITAL LETTER T WITH CEDILLA]
+"\u0162" => "T"
+
+# Ť  [LATIN CAPITAL LETTER T WITH CARON]
+"\u0164" => "T"
+
+# Ŧ  [LATIN CAPITAL LETTER T WITH STROKE]
+"\u0166" => "T"
+
+# Ƭ  [LATIN CAPITAL LETTER T WITH HOOK]
+"\u01AC" => "T"
+
+# Ʈ  [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
+"\u01AE" => "T"
+
+# Ț  [LATIN CAPITAL LETTER T WITH COMMA BELOW]
+"\u021A" => "T"
+
+# Ⱦ  [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
+"\u023E" => "T"
+
+# ᴛ  [LATIN LETTER SMALL CAPITAL T]
+"\u1D1B" => "T"
+
+# Ṫ  [LATIN CAPITAL LETTER T WITH DOT ABOVE]
+"\u1E6A" => "T"
+
+# Ṭ  [LATIN CAPITAL LETTER T WITH DOT BELOW]
+"\u1E6C" => "T"
+
+# Ṯ  [LATIN CAPITAL LETTER T WITH LINE BELOW]
+"\u1E6E" => "T"
+
+# Ṱ  [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
+"\u1E70" => "T"
+
+# Ⓣ  [CIRCLED LATIN CAPITAL LETTER T]
+"\u24C9" => "T"
+
+# Ꞇ  [LATIN CAPITAL LETTER INSULAR T]
+"\uA786" => "T"
+
+# Ｔ  [FULLWIDTH LATIN CAPITAL LETTER T]
+"\uFF34" => "T"
+
+# ţ  [LATIN SMALL LETTER T WITH CEDILLA]
+"\u0163" => "t"
+
+# ť  [LATIN SMALL LETTER T WITH CARON]
+"\u0165" => "t"
+
+# ŧ  [LATIN SMALL LETTER T WITH STROKE]
+"\u0167" => "t"
+
+# ƫ  [LATIN SMALL LETTER T WITH PALATAL HOOK]
+"\u01AB" => "t"
+
+# ƭ  [LATIN SMALL LETTER T WITH HOOK]
+"\u01AD" => "t"
+
+# ț  [LATIN SMALL LETTER T WITH COMMA BELOW]
+"\u021B" => "t"
+
+# ȶ  [LATIN SMALL LETTER T WITH CURL]
+"\u0236" => "t"
+
+# ʇ  [LATIN SMALL LETTER TURNED T]
+"\u0287" => "t"
+
+# ʈ  [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
+"\u0288" => "t"
+
+# ᵵ  [LATIN SMALL LETTER T WITH MIDDLE TILDE]
+"\u1D75" => "t"
+
+# ṫ  [LATIN SMALL LETTER T WITH DOT ABOVE]
+"\u1E6B" => "t"
+
+# ṭ  [LATIN SMALL LETTER T WITH DOT BELOW]
+"\u1E6D" => "t"
+
+# ṯ  [LATIN SMALL LETTER T WITH LINE BELOW]
+"\u1E6F" => "t"
+
+# ṱ  [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
+"\u1E71" => "t"
+
+# ẗ  [LATIN SMALL LETTER T WITH DIAERESIS]
+"\u1E97" => "t"
+
+# ⓣ  [CIRCLED LATIN SMALL LETTER T]
+"\u24E3" => "t"
+
+# ⱦ  [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
+"\u2C66" => "t"
+
+# ｔ  [FULLWIDTH LATIN SMALL LETTER T]
+"\uFF54" => "t"
+
+# Þ  [LATIN CAPITAL LETTER THORN]
+"\u00DE" => "TH"
+
+# Ꝧ  [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
+"\uA766" => "TH"
+
+# Ꜩ  [LATIN CAPITAL LETTER TZ]
+"\uA728" => "TZ"
+
+# ⒯  [PARENTHESIZED LATIN SMALL LETTER T]
+"\u24AF" => "(t)"
+
+# ʨ  [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
+"\u02A8" => "tc"
+
+# þ  [LATIN SMALL LETTER THORN]
+"\u00FE" => "th"
+
+# ᵺ  [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
+"\u1D7A" => "th"
+
+# ꝧ  [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
+"\uA767" => "th"
+
+# ʦ  [LATIN SMALL LETTER TS DIGRAPH]
+"\u02A6" => "ts"
+
+# ꜩ  [LATIN SMALL LETTER TZ]
+"\uA729" => "tz"
+
+# Ù  [LATIN CAPITAL LETTER U WITH GRAVE]
+"\u00D9" => "U"
+
+# Ú  [LATIN CAPITAL LETTER U WITH ACUTE]
+"\u00DA" => "U"
+
+# Û  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
+"\u00DB" => "U"
+
+# Ü  [LATIN CAPITAL LETTER U WITH DIAERESIS]
+"\u00DC" => "U"
+
+# Ũ  [LATIN CAPITAL LETTER U WITH TILDE]
+"\u0168" => "U"
+
+# Ū  [LATIN CAPITAL LETTER U WITH MACRON]
+"\u016A" => "U"
+
+# Ŭ  [LATIN CAPITAL LETTER U WITH BREVE]
+"\u016C" => "U"
+
+# Ů  [LATIN CAPITAL LETTER U WITH RING ABOVE]
+"\u016E" => "U"
+
+# Ű  [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
+"\u0170" => "U"
+
+# Ų  [LATIN CAPITAL LETTER U WITH OGONEK]
+"\u0172" => "U"
+
+# Ư  [LATIN CAPITAL LETTER U WITH HORN]
+"\u01AF" => "U"
+
+# Ǔ  [LATIN CAPITAL LETTER U WITH CARON]
+"\u01D3" => "U"
+
+# Ǖ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
+"\u01D5" => "U"
+
+# Ǘ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
+"\u01D7" => "U"
+
+# Ǚ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
+"\u01D9" => "U"
+
+# Ǜ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
+"\u01DB" => "U"
+
+# Ȕ  [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
+"\u0214" => "U"
+
+# Ȗ  [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
+"\u0216" => "U"
+
+# Ʉ  [LATIN CAPITAL LETTER U BAR]
+"\u0244" => "U"
+
+# ᴜ  [LATIN LETTER SMALL CAPITAL U]
+"\u1D1C" => "U"
+
+# ᵾ  [LATIN SMALL CAPITAL LETTER U WITH STROKE]
+"\u1D7E" => "U"
+
+# Ṳ  [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
+"\u1E72" => "U"
+
+# Ṵ  [LATIN CAPITAL LETTER U WITH TILDE BELOW]
+"\u1E74" => "U"
+
+# Ṷ  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
+"\u1E76" => "U"
+
+# Ṹ  [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
+"\u1E78" => "U"
+
+# Ṻ  [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
+"\u1E7A" => "U"
+
+# Ụ  [LATIN CAPITAL LETTER U WITH DOT BELOW]
+"\u1EE4" => "U"
+
+# Ủ  [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
+"\u1EE6" => "U"
+
+# Ứ  [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
+"\u1EE8" => "U"
+
+# Ừ  [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
+"\u1EEA" => "U"
+
+# Ử  [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
+"\u1EEC" => "U"
+
+# Ữ  [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
+"\u1EEE" => "U"
+
+# Ự  [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
+"\u1EF0" => "U"
+
+# Ⓤ  [CIRCLED LATIN CAPITAL LETTER U]
+"\u24CA" => "U"
+
+# Ｕ  [FULLWIDTH LATIN CAPITAL LETTER U]
+"\uFF35" => "U"
+
+# ù  [LATIN SMALL LETTER U WITH GRAVE]
+"\u00F9" => "u"
+
+# ú  [LATIN SMALL LETTER U WITH ACUTE]
+"\u00FA" => "u"
+
+# û  [LATIN SMALL LETTER U WITH CIRCUMFLEX]
+"\u00FB" => "u"
+
+# ü  [LATIN SMALL LETTER U WITH DIAERESIS]
+"\u00FC" => "u"
+
+# ũ  [LATIN SMALL LETTER U WITH TILDE]
+"\u0169" => "u"
+
+# ū  [LATIN SMALL LETTER U WITH MACRON]
+"\u016B" => "u"
+
+# ŭ  [LATIN SMALL LETTER U WITH BREVE]
+"\u016D" => "u"
+
+# ů  [LATIN SMALL LETTER U WITH RING ABOVE]
+"\u016F" => "u"
+
+# ű  [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
+"\u0171" => "u"
+
+# ų  [LATIN SMALL LETTER U WITH OGONEK]
+"\u0173" => "u"
+
+# ư  [LATIN SMALL LETTER U WITH HORN]
+"\u01B0" => "u"
+
+# ǔ  [LATIN SMALL LETTER U WITH CARON]
+"\u01D4" => "u"
+
+# ǖ  [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
+"\u01D6" => "u"
+
+# ǘ  [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
+"\u01D8" => "u"
+
+# ǚ  [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
+"\u01DA" => "u"
+
+# ǜ  [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
+"\u01DC" => "u"
+
+# ȕ  [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
+"\u0215" => "u"
+
+# ȗ  [LATIN SMALL LETTER U WITH INVERTED BREVE]
+"\u0217" => "u"
+
+# ʉ  [LATIN SMALL LETTER U BAR]
+"\u0289" => "u"
+
+# ᵤ  [LATIN SUBSCRIPT SMALL LETTER U]
+"\u1D64" => "u"
+
+# ᶙ  [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
+"\u1D99" => "u"
+
+# ṳ  [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
+"\u1E73" => "u"
+
+# ṵ  [LATIN SMALL LETTER U WITH TILDE BELOW]
+"\u1E75" => "u"
+
+# ṷ  [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
+"\u1E77" => "u"
+
+# ṹ  [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
+"\u1E79" => "u"
+
+# ṻ  [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
+"\u1E7B" => "u"
+
+# ụ  [LATIN SMALL LETTER U WITH DOT BELOW]
+"\u1EE5" => "u"
+
+# ủ  [LATIN SMALL LETTER U WITH HOOK ABOVE]
+"\u1EE7" => "u"
+
+# ứ  [LATIN SMALL LETTER U WITH HORN AND ACUTE]
+"\u1EE9" => "u"
+
+# ừ  [LATIN SMALL LETTER U WITH HORN AND GRAVE]
+"\u1EEB" => "u"
+
+# ử  [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
+"\u1EED" => "u"
+
+# ữ  [LATIN SMALL LETTER U WITH HORN AND TILDE]
+"\u1EEF" => "u"
+
+# ự  [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
+"\u1EF1" => "u"
+
+# ⓤ  [CIRCLED LATIN SMALL LETTER U]
+"\u24E4" => "u"
+
+# ｕ  [FULLWIDTH LATIN SMALL LETTER U]
+"\uFF55" => "u"
+
+# ⒰  [PARENTHESIZED LATIN SMALL LETTER U]
+"\u24B0" => "(u)"
+
+# ᵫ  [LATIN SMALL LETTER UE]
+"\u1D6B" => "ue"
+
+# Ʋ  [LATIN CAPITAL LETTER V WITH HOOK]
+"\u01B2" => "V"
+
+# Ʌ  [LATIN CAPITAL LETTER TURNED V]
+"\u0245" => "V"
+
+# ᴠ  [LATIN LETTER SMALL CAPITAL V]
+"\u1D20" => "V"
+
+# Ṽ  [LATIN CAPITAL LETTER V WITH TILDE]
+"\u1E7C" => "V"
+
+# Ṿ  [LATIN CAPITAL LETTER V WITH DOT BELOW]
+"\u1E7E" => "V"
+
+# Ỽ  [LATIN CAPITAL LETTER MIDDLE-WELSH V]
+"\u1EFC" => "V"
+
+# Ⓥ  [CIRCLED LATIN CAPITAL LETTER V]
+"\u24CB" => "V"
+
+# Ꝟ  [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
+"\uA75E" => "V"
+
+# Ꝩ  [LATIN CAPITAL LETTER VEND]
+"\uA768" => "V"
+
+# Ｖ  [FULLWIDTH LATIN CAPITAL LETTER V]
+"\uFF36" => "V"
+
+# ʋ  [LATIN SMALL LETTER V WITH HOOK]
+"\u028B" => "v"
+
+# ʌ  [LATIN SMALL LETTER TURNED V]
+"\u028C" => "v"
+
+# ᵥ  [LATIN SUBSCRIPT SMALL LETTER V]
+"\u1D65" => "v"
+
+# ᶌ  [LATIN SMALL LETTER V WITH PALATAL HOOK]
+"\u1D8C" => "v"
+
+# ṽ  [LATIN SMALL LETTER V WITH TILDE]
+"\u1E7D" => "v"
+
+# ṿ  [LATIN SMALL LETTER V WITH DOT BELOW]
+"\u1E7F" => "v"
+
+# ⓥ  [CIRCLED LATIN SMALL LETTER V]
+"\u24E5" => "v"
+
+# ⱱ  [LATIN SMALL LETTER V WITH RIGHT HOOK]
+"\u2C71" => "v"
+
+# ⱴ  [LATIN SMALL LETTER V WITH CURL]
+"\u2C74" => "v"
+
+# ꝟ  [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
+"\uA75F" => "v"
+
+# ｖ  [FULLWIDTH LATIN SMALL LETTER V]
+"\uFF56" => "v"
+
+# Ꝡ  [LATIN CAPITAL LETTER VY]
+"\uA760" => "VY"
+
+# ⒱  [PARENTHESIZED LATIN SMALL LETTER V]
+"\u24B1" => "(v)"
+
+# ꝡ  [LATIN SMALL LETTER VY]
+"\uA761" => "vy"
+
+# Ŵ  [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
+"\u0174" => "W"
+
+# Ƿ  http://en.wikipedia.org/wiki/Wynn  [LATIN CAPITAL LETTER WYNN]
+"\u01F7" => "W"
+
+# ᴡ  [LATIN LETTER SMALL CAPITAL W]
+"\u1D21" => "W"
+
+# Ẁ  [LATIN CAPITAL LETTER W WITH GRAVE]
+"\u1E80" => "W"
+
+# Ẃ  [LATIN CAPITAL LETTER W WITH ACUTE]
+"\u1E82" => "W"
+
+# Ẅ  [LATIN CAPITAL LETTER W WITH DIAERESIS]
+"\u1E84" => "W"
+
+# Ẇ  [LATIN CAPITAL LETTER W WITH DOT ABOVE]
+"\u1E86" => "W"
+
+# Ẉ  [LATIN CAPITAL LETTER W WITH DOT BELOW]
+"\u1E88" => "W"
+
+# Ⓦ  [CIRCLED LATIN CAPITAL LETTER W]
+"\u24CC" => "W"
+
+# Ⱳ  [LATIN CAPITAL LETTER W WITH HOOK]
+"\u2C72" => "W"
+
+# Ｗ  [FULLWIDTH LATIN CAPITAL LETTER W]
+"\uFF37" => "W"
+
+# ŵ  [LATIN SMALL LETTER W WITH CIRCUMFLEX]
+"\u0175" => "w"
+
+# ƿ  http://en.wikipedia.org/wiki/Wynn  [LATIN LETTER WYNN]
+"\u01BF" => "w"
+
+# ʍ  [LATIN SMALL LETTER TURNED W]
+"\u028D" => "w"
+
+# ẁ  [LATIN SMALL LETTER W WITH GRAVE]
+"\u1E81" => "w"
+
+# ẃ  [LATIN SMALL LETTER W WITH ACUTE]
+"\u1E83" => "w"
+
+# ẅ  [LATIN SMALL LETTER W WITH DIAERESIS]
+"\u1E85" => "w"
+
+# ẇ  [LATIN SMALL LETTER W WITH DOT ABOVE]
+"\u1E87" => "w"
+
+# ẉ  [LATIN SMALL LETTER W WITH DOT BELOW]
+"\u1E89" => "w"
+
+# ẘ  [LATIN SMALL LETTER W WITH RING ABOVE]
+"\u1E98" => "w"
+
+# ⓦ  [CIRCLED LATIN SMALL LETTER W]
+"\u24E6" => "w"
+
+# ⱳ  [LATIN SMALL LETTER W WITH HOOK]
+"\u2C73" => "w"
+
+# ｗ  [FULLWIDTH LATIN SMALL LETTER W]
+"\uFF57" => "w"
+
+# ⒲  [PARENTHESIZED LATIN SMALL LETTER W]
+"\u24B2" => "(w)"
+
+# Ẋ  [LATIN CAPITAL LETTER X WITH DOT ABOVE]
+"\u1E8A" => "X"
+
+# Ẍ  [LATIN CAPITAL LETTER X WITH DIAERESIS]
+"\u1E8C" => "X"
+
+# Ⓧ  [CIRCLED LATIN CAPITAL LETTER X]
+"\u24CD" => "X"
+
+# Ｘ  [FULLWIDTH LATIN CAPITAL LETTER X]
+"\uFF38" => "X"
+
+# ᶍ  [LATIN SMALL LETTER X WITH PALATAL HOOK]
+"\u1D8D" => "x"
+
+# ẋ  [LATIN SMALL LETTER X WITH DOT ABOVE]
+"\u1E8B" => "x"
+
+# ẍ  [LATIN SMALL LETTER X WITH DIAERESIS]
+"\u1E8D" => "x"
+
+# ₓ  [LATIN SUBSCRIPT SMALL LETTER X]
+"\u2093" => "x"
+
+# ⓧ  [CIRCLED LATIN SMALL LETTER X]
+"\u24E7" => "x"
+
+# ｘ  [FULLWIDTH LATIN SMALL LETTER X]
+"\uFF58" => "x"
+
+# ⒳  [PARENTHESIZED LATIN SMALL LETTER X]
+"\u24B3" => "(x)"
+
+# Ý  [LATIN CAPITAL LETTER Y WITH ACUTE]
+"\u00DD" => "Y"
+
+# Ŷ  [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
+"\u0176" => "Y"
+
+# Ÿ  [LATIN CAPITAL LETTER Y WITH DIAERESIS]
+"\u0178" => "Y"
+
+# Ƴ  [LATIN CAPITAL LETTER Y WITH HOOK]
+"\u01B3" => "Y"
+
+# Ȳ  [LATIN CAPITAL LETTER Y WITH MACRON]
+"\u0232" => "Y"
+
+# Ɏ  [LATIN CAPITAL LETTER Y WITH STROKE]
+"\u024E" => "Y"
+
+# ʏ  [LATIN LETTER SMALL CAPITAL Y]
+"\u028F" => "Y"
+
+# Ẏ  [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
+"\u1E8E" => "Y"
+
+# Ỳ  [LATIN CAPITAL LETTER Y WITH GRAVE]
+"\u1EF2" => "Y"
+
+# Ỵ  [LATIN CAPITAL LETTER Y WITH DOT BELOW]
+"\u1EF4" => "Y"
+
+# Ỷ  [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
+"\u1EF6" => "Y"
+
+# Ỹ  [LATIN CAPITAL LETTER Y WITH TILDE]
+"\u1EF8" => "Y"
+
+# Ỿ  [LATIN CAPITAL LETTER Y WITH LOOP]
+"\u1EFE" => "Y"
+
+# Ⓨ  [CIRCLED LATIN CAPITAL LETTER Y]
+"\u24CE" => "Y"
+
+# Ｙ  [FULLWIDTH LATIN CAPITAL LETTER Y]
+"\uFF39" => "Y"
+
+# ý  [LATIN SMALL LETTER Y WITH ACUTE]
+"\u00FD" => "y"
+
+# ÿ  [LATIN SMALL LETTER Y WITH DIAERESIS]
+"\u00FF" => "y"
+
+# ŷ  [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
+"\u0177" => "y"
+
+# ƴ  [LATIN SMALL LETTER Y WITH HOOK]
+"\u01B4" => "y"
+
+# ȳ  [LATIN SMALL LETTER Y WITH MACRON]
+"\u0233" => "y"
+
+# ɏ  [LATIN SMALL LETTER Y WITH STROKE]
+"\u024F" => "y"
+
+# ʎ  [LATIN SMALL LETTER TURNED Y]
+"\u028E" => "y"
+
+# ẏ  [LATIN SMALL LETTER Y WITH DOT ABOVE]
+"\u1E8F" => "y"
+
+# ẙ  [LATIN SMALL LETTER Y WITH RING ABOVE]
+"\u1E99" => "y"
+
+# ỳ  [LATIN SMALL LETTER Y WITH GRAVE]
+"\u1EF3" => "y"
+
+# ỵ  [LATIN SMALL LETTER Y WITH DOT BELOW]
+"\u1EF5" => "y"
+
+# ỷ  [LATIN SMALL LETTER Y WITH HOOK ABOVE]
+"\u1EF7" => "y"
+
+# ỹ  [LATIN SMALL LETTER Y WITH TILDE]
+"\u1EF9" => "y"
+
+# ỿ  [LATIN SMALL LETTER Y WITH LOOP]
+"\u1EFF" => "y"
+
+# ⓨ  [CIRCLED LATIN SMALL LETTER Y]
+"\u24E8" => "y"
+
+# ｙ  [FULLWIDTH LATIN SMALL LETTER Y]
+"\uFF59" => "y"
+
+# ⒴  [PARENTHESIZED LATIN SMALL LETTER Y]
+"\u24B4" => "(y)"
+
+# Ź  [LATIN CAPITAL LETTER Z WITH ACUTE]
+"\u0179" => "Z"
+
+# Ż  [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
+"\u017B" => "Z"
+
+# Ž  [LATIN CAPITAL LETTER Z WITH CARON]
+"\u017D" => "Z"
+
+# Ƶ  [LATIN CAPITAL LETTER Z WITH STROKE]
+"\u01B5" => "Z"
+
+# Ȝ  http://en.wikipedia.org/wiki/Yogh  [LATIN CAPITAL LETTER YOGH]
+"\u021C" => "Z"
+
+# Ȥ  [LATIN CAPITAL LETTER Z WITH HOOK]
+"\u0224" => "Z"
+
+# ᴢ  [LATIN LETTER SMALL CAPITAL Z]
+"\u1D22" => "Z"
+
+# Ẑ  [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
+"\u1E90" => "Z"
+
+# Ẓ  [LATIN CAPITAL LETTER Z WITH DOT BELOW]
+"\u1E92" => "Z"
+
+# Ẕ  [LATIN CAPITAL LETTER Z WITH LINE BELOW]
+"\u1E94" => "Z"
+
+# Ⓩ  [CIRCLED LATIN CAPITAL LETTER Z]
+"\u24CF" => "Z"
+
+# Ⱬ  [LATIN CAPITAL LETTER Z WITH DESCENDER]
+"\u2C6B" => "Z"
+
+# Ꝣ  [LATIN CAPITAL LETTER VISIGOTHIC Z]
+"\uA762" => "Z"
+
+# Ｚ  [FULLWIDTH LATIN CAPITAL LETTER Z]
+"\uFF3A" => "Z"
+
+# ź  [LATIN SMALL LETTER Z WITH ACUTE]
+"\u017A" => "z"
+
+# ż  [LATIN SMALL LETTER Z WITH DOT ABOVE]
+"\u017C" => "z"
+
+# ž  [LATIN SMALL LETTER Z WITH CARON]
+"\u017E" => "z"
+
+# ƶ  [LATIN SMALL LETTER Z WITH STROKE]
+"\u01B6" => "z"
+
+# ȝ  http://en.wikipedia.org/wiki/Yogh  [LATIN SMALL LETTER YOGH]
+"\u021D" => "z"
+
+# ȥ  [LATIN SMALL LETTER Z WITH HOOK]
+"\u0225" => "z"
+
+# ɀ  [LATIN SMALL LETTER Z WITH SWASH TAIL]
+"\u0240" => "z"
+
+# ʐ  [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
+"\u0290" => "z"
+
+# ʑ  [LATIN SMALL LETTER Z WITH CURL]
+"\u0291" => "z"
+
+# ᵶ  [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
+"\u1D76" => "z"
+
+# ᶎ  [LATIN SMALL LETTER Z WITH PALATAL HOOK]
+"\u1D8E" => "z"
+
+# ẑ  [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
+"\u1E91" => "z"
+
+# ẓ  [LATIN SMALL LETTER Z WITH DOT BELOW]
+"\u1E93" => "z"
+
+# ẕ  [LATIN SMALL LETTER Z WITH LINE BELOW]
+"\u1E95" => "z"
+
+# ⓩ  [CIRCLED LATIN SMALL LETTER Z]
+"\u24E9" => "z"
+
+# ⱬ  [LATIN SMALL LETTER Z WITH DESCENDER]
+"\u2C6C" => "z"
+
+# ꝣ  [LATIN SMALL LETTER VISIGOTHIC Z]
+"\uA763" => "z"
+
+# ｚ  [FULLWIDTH LATIN SMALL LETTER Z]
+"\uFF5A" => "z"
+
+# ⒵  [PARENTHESIZED LATIN SMALL LETTER Z]
+"\u24B5" => "(z)"
+
+# ⁰  [SUPERSCRIPT ZERO]
+"\u2070" => "0"
+
+# ₀  [SUBSCRIPT ZERO]
+"\u2080" => "0"
+
+# ⓪  [CIRCLED DIGIT ZERO]
+"\u24EA" => "0"
+
+# ⓿  [NEGATIVE CIRCLED DIGIT ZERO]
+"\u24FF" => "0"
+
+# ０  [FULLWIDTH DIGIT ZERO]
+"\uFF10" => "0"
+
+# ¹  [SUPERSCRIPT ONE]
+"\u00B9" => "1"
+
+# ₁  [SUBSCRIPT ONE]
+"\u2081" => "1"
+
+# ①  [CIRCLED DIGIT ONE]
+"\u2460" => "1"
+
+# ⓵  [DOUBLE CIRCLED DIGIT ONE]
+"\u24F5" => "1"
+
+# ❶  [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
+"\u2776" => "1"
+
+# ➀  [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
+"\u2780" => "1"
+
+# ➊  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
+"\u278A" => "1"
+
+# １  [FULLWIDTH DIGIT ONE]
+"\uFF11" => "1"
+
+# ⒈  [DIGIT ONE FULL STOP]
+"\u2488" => "1."
+
+# ⑴  [PARENTHESIZED DIGIT ONE]
+"\u2474" => "(1)"
+
+# ²  [SUPERSCRIPT TWO]
+"\u00B2" => "2"
+
+# ₂  [SUBSCRIPT TWO]
+"\u2082" => "2"
+
+# ②  [CIRCLED DIGIT TWO]
+"\u2461" => "2"
+
+# ⓶  [DOUBLE CIRCLED DIGIT TWO]
+"\u24F6" => "2"
+
+# ❷  [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
+"\u2777" => "2"
+
+# ➁  [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
+"\u2781" => "2"
+
+# ➋  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
+"\u278B" => "2"
+
+# ２  [FULLWIDTH DIGIT TWO]
+"\uFF12" => "2"
+
+# ⒉  [DIGIT TWO FULL STOP]
+"\u2489" => "2."
+
+# ⑵  [PARENTHESIZED DIGIT TWO]
+"\u2475" => "(2)"
+
+# ³  [SUPERSCRIPT THREE]
+"\u00B3" => "3"
+
+# ₃  [SUBSCRIPT THREE]
+"\u2083" => "3"
+
+# ③  [CIRCLED DIGIT THREE]
+"\u2462" => "3"
+
+# ⓷  [DOUBLE CIRCLED DIGIT THREE]
+"\u24F7" => "3"
+
+# ❸  [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
+"\u2778" => "3"
+
+# ➂  [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
+"\u2782" => "3"
+
+# ➌  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
+"\u278C" => "3"
+
+# ３  [FULLWIDTH DIGIT THREE]
+"\uFF13" => "3"
+
+# ⒊  [DIGIT THREE FULL STOP]
+"\u248A" => "3."
+
+# ⑶  [PARENTHESIZED DIGIT THREE]
+"\u2476" => "(3)"
+
+# ⁴  [SUPERSCRIPT FOUR]
+"\u2074" => "4"
+
+# ₄  [SUBSCRIPT FOUR]
+"\u2084" => "4"
+
+# ④  [CIRCLED DIGIT FOUR]
+"\u2463" => "4"
+
+# ⓸  [DOUBLE CIRCLED DIGIT FOUR]
+"\u24F8" => "4"
+
+# ❹  [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
+"\u2779" => "4"
+
+# ➃  [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
+"\u2783" => "4"
+
+# ➍  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
+"\u278D" => "4"
+
+# ４  [FULLWIDTH DIGIT FOUR]
+"\uFF14" => "4"
+
+# ⒋  [DIGIT FOUR FULL STOP]
+"\u248B" => "4."
+
+# ⑷  [PARENTHESIZED DIGIT FOUR]
+"\u2477" => "(4)"
+
+# ⁵  [SUPERSCRIPT FIVE]
+"\u2075" => "5"
+
+# ₅  [SUBSCRIPT FIVE]
+"\u2085" => "5"
+
+# ⑤  [CIRCLED DIGIT FIVE]
+"\u2464" => "5"
+
+# ⓹  [DOUBLE CIRCLED DIGIT FIVE]
+"\u24F9" => "5"
+
+# ❺  [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
+"\u277A" => "5"
+
+# ➄  [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
+"\u2784" => "5"
+
+# ➎  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
+"\u278E" => "5"
+
+# ５  [FULLWIDTH DIGIT FIVE]
+"\uFF15" => "5"
+
+# ⒌  [DIGIT FIVE FULL STOP]
+"\u248C" => "5."
+
+# ⑸  [PARENTHESIZED DIGIT FIVE]
+"\u2478" => "(5)"
+
+# ⁶  [SUPERSCRIPT SIX]
+"\u2076" => "6"
+
+# ₆  [SUBSCRIPT SIX]
+"\u2086" => "6"
+
+# ⑥  [CIRCLED DIGIT SIX]
+"\u2465" => "6"
+
+# ⓺  [DOUBLE CIRCLED DIGIT SIX]
+"\u24FA" => "6"
+
+# ❻  [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
+"\u277B" => "6"
+
+# ➅  [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
+"\u2785" => "6"
+
+# ➏  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
+"\u278F" => "6"
+
+# ６  [FULLWIDTH DIGIT SIX]
+"\uFF16" => "6"
+
+# ⒍  [DIGIT SIX FULL STOP]
+"\u248D" => "6."
+
+# ⑹  [PARENTHESIZED DIGIT SIX]
+"\u2479" => "(6)"
+
+# ⁷  [SUPERSCRIPT SEVEN]
+"\u2077" => "7"
+
+# ₇  [SUBSCRIPT SEVEN]
+"\u2087" => "7"
+
+# ⑦  [CIRCLED DIGIT SEVEN]
+"\u2466" => "7"
+
+# ⓻  [DOUBLE CIRCLED DIGIT SEVEN]
+"\u24FB" => "7"
+
+# ❼  [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
+"\u277C" => "7"
+
+# ➆  [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
+"\u2786" => "7"
+
+# ➐  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
+"\u2790" => "7"
+
+# ７  [FULLWIDTH DIGIT SEVEN]
+"\uFF17" => "7"
+
+# ⒎  [DIGIT SEVEN FULL STOP]
+"\u248E" => "7."
+
+# ⑺  [PARENTHESIZED DIGIT SEVEN]
+"\u247A" => "(7)"
+
+# ⁸  [SUPERSCRIPT EIGHT]
+"\u2078" => "8"
+
+# ₈  [SUBSCRIPT EIGHT]
+"\u2088" => "8"
+
+# ⑧  [CIRCLED DIGIT EIGHT]
+"\u2467" => "8"
+
+# ⓼  [DOUBLE CIRCLED DIGIT EIGHT]
+"\u24FC" => "8"
+
+# ❽  [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
+"\u277D" => "8"
+
+# ➇  [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
+"\u2787" => "8"
+
+# ➑  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
+"\u2791" => "8"
+
+# ８  [FULLWIDTH DIGIT EIGHT]
+"\uFF18" => "8"
+
+# ⒏  [DIGIT EIGHT FULL STOP]
+"\u248F" => "8."
+
+# ⑻  [PARENTHESIZED DIGIT EIGHT]
+"\u247B" => "(8)"
+
+# ⁹  [SUPERSCRIPT NINE]
+"\u2079" => "9"
+
+# ₉  [SUBSCRIPT NINE]
+"\u2089" => "9"
+
+# ⑨  [CIRCLED DIGIT NINE]
+"\u2468" => "9"
+
+# ⓽  [DOUBLE CIRCLED DIGIT NINE]
+"\u24FD" => "9"
+
+# ❾  [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
+"\u277E" => "9"
+
+# ➈  [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
+"\u2788" => "9"
+
+# ➒  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
+"\u2792" => "9"
+
+# ９  [FULLWIDTH DIGIT NINE]
+"\uFF19" => "9"
+
+# ⒐  [DIGIT NINE FULL STOP]
+"\u2490" => "9."
+
+# ⑼  [PARENTHESIZED DIGIT NINE]
+"\u247C" => "(9)"
+
+# ⑩  [CIRCLED NUMBER TEN]
+"\u2469" => "10"
+
+# ⓾  [DOUBLE CIRCLED NUMBER TEN]
+"\u24FE" => "10"
+
+# ❿  [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
+"\u277F" => "10"
+
+# ➉  [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
+"\u2789" => "10"
+
+# ➓  [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
+"\u2793" => "10"
+
+# ⒑  [NUMBER TEN FULL STOP]
+"\u2491" => "10."
+
+# ⑽  [PARENTHESIZED NUMBER TEN]
+"\u247D" => "(10)"
+
+# ⑪  [CIRCLED NUMBER ELEVEN]
+"\u246A" => "11"
+
+# ⓫  [NEGATIVE CIRCLED NUMBER ELEVEN]
+"\u24EB" => "11"
+
+# ⒒  [NUMBER ELEVEN FULL STOP]
+"\u2492" => "11."
+
+# ⑾  [PARENTHESIZED NUMBER ELEVEN]
+"\u247E" => "(11)"
+
+# ⑫  [CIRCLED NUMBER TWELVE]
+"\u246B" => "12"
+
+# ⓬  [NEGATIVE CIRCLED NUMBER TWELVE]
+"\u24EC" => "12"
+
+# ⒓  [NUMBER TWELVE FULL STOP]
+"\u2493" => "12."
+
+# ⑿  [PARENTHESIZED NUMBER TWELVE]
+"\u247F" => "(12)"
+
+# ⑬  [CIRCLED NUMBER THIRTEEN]
+"\u246C" => "13"
+
+# ⓭  [NEGATIVE CIRCLED NUMBER THIRTEEN]
+"\u24ED" => "13"
+
+# ⒔  [NUMBER THIRTEEN FULL STOP]
+"\u2494" => "13."
+
+# ⒀  [PARENTHESIZED NUMBER THIRTEEN]
+"\u2480" => "(13)"
+
+# ⑭  [CIRCLED NUMBER FOURTEEN]
+"\u246D" => "14"
+
+# ⓮  [NEGATIVE CIRCLED NUMBER FOURTEEN]
+"\u24EE" => "14"
+
+# ⒕  [NUMBER FOURTEEN FULL STOP]
+"\u2495" => "14."
+
+# ⒁  [PARENTHESIZED NUMBER FOURTEEN]
+"\u2481" => "(14)"
+
+# ⑮  [CIRCLED NUMBER FIFTEEN]
+"\u246E" => "15"
+
+# ⓯  [NEGATIVE CIRCLED NUMBER FIFTEEN]
+"\u24EF" => "15"
+
+# ⒖  [NUMBER FIFTEEN FULL STOP]
+"\u2496" => "15."
+
+# ⒂  [PARENTHESIZED NUMBER FIFTEEN]
+"\u2482" => "(15)"
+
+# ⑯  [CIRCLED NUMBER SIXTEEN]
+"\u246F" => "16"
+
+# ⓰  [NEGATIVE CIRCLED NUMBER SIXTEEN]
+"\u24F0" => "16"
+
+# ⒗  [NUMBER SIXTEEN FULL STOP]
+"\u2497" => "16."
+
+# ⒃  [PARENTHESIZED NUMBER SIXTEEN]
+"\u2483" => "(16)"
+
+# ⑰  [CIRCLED NUMBER SEVENTEEN]
+"\u2470" => "17"
+
+# ⓱  [NEGATIVE CIRCLED NUMBER SEVENTEEN]
+"\u24F1" => "17"
+
+# ⒘  [NUMBER SEVENTEEN FULL STOP]
+"\u2498" => "17."
+
+# ⒄  [PARENTHESIZED NUMBER SEVENTEEN]
+"\u2484" => "(17)"
+
+# ⑱  [CIRCLED NUMBER EIGHTEEN]
+"\u2471" => "18"
+
+# ⓲  [NEGATIVE CIRCLED NUMBER EIGHTEEN]
+"\u24F2" => "18"
+
+# ⒙  [NUMBER EIGHTEEN FULL STOP]
+"\u2499" => "18."
+
+# ⒅  [PARENTHESIZED NUMBER EIGHTEEN]
+"\u2485" => "(18)"
+
+# ⑲  [CIRCLED NUMBER NINETEEN]
+"\u2472" => "19"
+
+# ⓳  [NEGATIVE CIRCLED NUMBER NINETEEN]
+"\u24F3" => "19"
+
+# ⒚  [NUMBER NINETEEN FULL STOP]
+"\u249A" => "19."
+
+# ⒆  [PARENTHESIZED NUMBER NINETEEN]
+"\u2486" => "(19)"
+
+# ⑳  [CIRCLED NUMBER TWENTY]
+"\u2473" => "20"
+
+# ⓴  [NEGATIVE CIRCLED NUMBER TWENTY]
+"\u24F4" => "20"
+
+# ⒛  [NUMBER TWENTY FULL STOP]
+"\u249B" => "20."
+
+# ⒇  [PARENTHESIZED NUMBER TWENTY]
+"\u2487" => "(20)"
+
+# «  [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
+"\u00AB" => "\""
+
+# »  [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
+"\u00BB" => "\""
+
+# “  [LEFT DOUBLE QUOTATION MARK]
+"\u201C" => "\""
+
+# ”  [RIGHT DOUBLE QUOTATION MARK]
+"\u201D" => "\""
+
+# „  [DOUBLE LOW-9 QUOTATION MARK]
+"\u201E" => "\""
+
+# ″  [DOUBLE PRIME]
+"\u2033" => "\""
+
+# ‶  [REVERSED DOUBLE PRIME]
+"\u2036" => "\""
+
+# ❝  [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
+"\u275D" => "\""
+
+# ❞  [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
+"\u275E" => "\""
+
+# ❮  [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+"\u276E" => "\""
+
+# ❯  [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+"\u276F" => "\""
+
+# ＂  [FULLWIDTH QUOTATION MARK]
+"\uFF02" => "\""
+
+# ‘  [LEFT SINGLE QUOTATION MARK]
+"\u2018" => "\'"
+
+# ’  [RIGHT SINGLE QUOTATION MARK]
+"\u2019" => "\'"
+
+# ‚  [SINGLE LOW-9 QUOTATION MARK]
+"\u201A" => "\'"
+
+# ‛  [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
+"\u201B" => "\'"
+
+# ′  [PRIME]
+"\u2032" => "\'"
+
+# ‵  [REVERSED PRIME]
+"\u2035" => "\'"
+
+# ‹  [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
+"\u2039" => "\'"
+
+# ›  [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
+"\u203A" => "\'"
+
+# ❛  [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
+"\u275B" => "\'"
+
+# ❜  [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
+"\u275C" => "\'"
+
+# ＇  [FULLWIDTH APOSTROPHE]
+"\uFF07" => "\'"
+
+# ‐  [HYPHEN]
+"\u2010" => "-"
+
+# ‑  [NON-BREAKING HYPHEN]
+"\u2011" => "-"
+
+# ‒  [FIGURE DASH]
+"\u2012" => "-"
+
+# –  [EN DASH]
+"\u2013" => "-"
+
+# —  [EM DASH]
+"\u2014" => "-"
+
+# ⁻  [SUPERSCRIPT MINUS]
+"\u207B" => "-"
+
+# ₋  [SUBSCRIPT MINUS]
+"\u208B" => "-"
+
+# －  [FULLWIDTH HYPHEN-MINUS]
+"\uFF0D" => "-"
+
+# ⁅  [LEFT SQUARE BRACKET WITH QUILL]
+"\u2045" => "["
+
+# ❲  [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
+"\u2772" => "["
+
+# ［  [FULLWIDTH LEFT SQUARE BRACKET]
+"\uFF3B" => "["
+
+# ⁆  [RIGHT SQUARE BRACKET WITH QUILL]
+"\u2046" => "]"
+
+# ❳  [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
+"\u2773" => "]"
+
+# ］  [FULLWIDTH RIGHT SQUARE BRACKET]
+"\uFF3D" => "]"
+
+# ⁽  [SUPERSCRIPT LEFT PARENTHESIS]
+"\u207D" => "("
+
+# ₍  [SUBSCRIPT LEFT PARENTHESIS]
+"\u208D" => "("
+
+# ❨  [MEDIUM LEFT PARENTHESIS ORNAMENT]
+"\u2768" => "("
+
+# ❪  [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
+"\u276A" => "("
+
+# （  [FULLWIDTH LEFT PARENTHESIS]
+"\uFF08" => "("
+
+# ⸨  [LEFT DOUBLE PARENTHESIS]
+"\u2E28" => "(("
+
+# ⁾  [SUPERSCRIPT RIGHT PARENTHESIS]
+"\u207E" => ")"
+
+# ₎  [SUBSCRIPT RIGHT PARENTHESIS]
+"\u208E" => ")"
+
+# ❩  [MEDIUM RIGHT PARENTHESIS ORNAMENT]
+"\u2769" => ")"
+
+# ❫  [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
+"\u276B" => ")"
+
+# ）  [FULLWIDTH RIGHT PARENTHESIS]
+"\uFF09" => ")"
+
+# ⸩  [RIGHT DOUBLE PARENTHESIS]
+"\u2E29" => "))"
+
+# ❬  [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
+"\u276C" => "<"
+
+# ❰  [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
+"\u2770" => "<"
+
+# ＜  [FULLWIDTH LESS-THAN SIGN]
+"\uFF1C" => "<"
+
+# ❭  [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+"\u276D" => ">"
+
+# ❱  [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+"\u2771" => ">"
+
+# ＞  [FULLWIDTH GREATER-THAN SIGN]
+"\uFF1E" => ">"
+
+# ❴  [MEDIUM LEFT CURLY BRACKET ORNAMENT]
+"\u2774" => "{"
+
+# ｛  [FULLWIDTH LEFT CURLY BRACKET]
+"\uFF5B" => "{"
+
+# ❵  [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
+"\u2775" => "}"
+
+# ｝  [FULLWIDTH RIGHT CURLY BRACKET]
+"\uFF5D" => "}"
+
+# ⁺  [SUPERSCRIPT PLUS SIGN]
+"\u207A" => "+"
+
+# ₊  [SUBSCRIPT PLUS SIGN]
+"\u208A" => "+"
+
+# ＋  [FULLWIDTH PLUS SIGN]
+"\uFF0B" => "+"
+
+# ⁼  [SUPERSCRIPT EQUALS SIGN]
+"\u207C" => "="
+
+# ₌  [SUBSCRIPT EQUALS SIGN]
+"\u208C" => "="
+
+# ＝  [FULLWIDTH EQUALS SIGN]
+"\uFF1D" => "="
+
+# ！  [FULLWIDTH EXCLAMATION MARK]
+"\uFF01" => "!"
+
+# ‼  [DOUBLE EXCLAMATION MARK]
+"\u203C" => "!!"
+
+# ⁉  [EXCLAMATION QUESTION MARK]
+"\u2049" => "!?"
+
+# ＃  [FULLWIDTH NUMBER SIGN]
+"\uFF03" => "#"
+
+# ＄  [FULLWIDTH DOLLAR SIGN]
+"\uFF04" => "$"
+
+# ⁒  [COMMERCIAL MINUS SIGN]
+"\u2052" => "%"
+
+# ％  [FULLWIDTH PERCENT SIGN]
+"\uFF05" => "%"
+
+# ＆  [FULLWIDTH AMPERSAND]
+"\uFF06" => "&"
+
+# ⁎  [LOW ASTERISK]
+"\u204E" => "*"
+
+# ＊  [FULLWIDTH ASTERISK]
+"\uFF0A" => "*"
+
+# ，  [FULLWIDTH COMMA]
+"\uFF0C" => ","
+
+# ．  [FULLWIDTH FULL STOP]
+"\uFF0E" => "."
+
+# ⁄  [FRACTION SLASH]
+"\u2044" => "/"
+
+# ／  [FULLWIDTH SOLIDUS]
+"\uFF0F" => "/"
+
+# ：  [FULLWIDTH COLON]
+"\uFF1A" => ":"
+
+# ⁏  [REVERSED SEMICOLON]
+"\u204F" => ";"
+
+# ；  [FULLWIDTH SEMICOLON]
+"\uFF1B" => ";"
+
+# ？  [FULLWIDTH QUESTION MARK]
+"\uFF1F" => "?"
+
+# ⁇  [DOUBLE QUESTION MARK]
+"\u2047" => "??"
+
+# ⁈  [QUESTION EXCLAMATION MARK]
+"\u2048" => "?!"
+
+# ＠  [FULLWIDTH COMMERCIAL AT]
+"\uFF20" => "@"
+
+# ＼  [FULLWIDTH REVERSE SOLIDUS]
+"\uFF3C" => "\\"
+
+# ‸  [CARET]
+"\u2038" => "^"
+
+# ＾  [FULLWIDTH CIRCUMFLEX ACCENT]
+"\uFF3E" => "^"
+
+# ＿  [FULLWIDTH LOW LINE]
+"\uFF3F" => "_"
+
+# ⁓  [SWUNG DASH]
+"\u2053" => "~"
+
+# ～  [FULLWIDTH TILDE]
+"\uFF5E" => "~"
+
+################################################################
+# Below is the Perl script used to generate the above mappings #
+# from ASCIIFoldingFilter.java:                                #
+################################################################
+#
+# #!/usr/bin/perl
+#
+# use warnings;
+# use strict;
+# 
+# my @source_chars = ();
+# my @source_char_descriptions = ();
+# my $target = '';
+# 
+# while (<>) {
+#   if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) {
+#     push @source_chars, $1;
+#        push @source_char_descriptions, $2;
+#        next;
+#   }
+#   if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) {
+#     $target .= $1;
+#     next;
+#   }
+#   if (/break;/) {
+#     $target = "\\\"" if ($target eq '"');
+#     for my $source_char_num (0..$#source_chars) {
+#          print "# $source_char_descriptions[$source_char_num]\n";
+#          print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n";
+#        }
+#        @source_chars = ();
+#        @source_char_descriptions = ();
+#        $target = '';
+#   }
+# }
diff --git a/zookeeper/solr/collection1/conf/mapping-ISOLatin1Accent.txt b/zookeeper/solr/collection1/conf/mapping-ISOLatin1Accent.txt

new file mode 100644 (file)

index 0000000..ede7742
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/mapping-ISOLatin1Accent.txt
@@ -0,0 +1,246 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Syntax:
+#   "source" => "target"
+#     "source".length() > 0 (source cannot be empty.)
+#     "target".length() >= 0 (target can be empty.)
+
+# example:
+#   "À" => "A"
+#   "\u00C0" => "A"
+#   "\u00C0" => "\u0041"
+#   "ß" => "ss"
+#   "\t" => " "
+#   "\n" => ""
+
+# À => A
+"\u00C0" => "A"
+
+# Á => A
+"\u00C1" => "A"
+
+# Â => A
+"\u00C2" => "A"
+
+# Ã => A
+"\u00C3" => "A"
+
+# Ä => A
+"\u00C4" => "A"
+
+# Å => A
+"\u00C5" => "A"
+
+# Æ => AE
+"\u00C6" => "AE"
+
+# Ç => C
+"\u00C7" => "C"
+
+# È => E
+"\u00C8" => "E"
+
+# É => E
+"\u00C9" => "E"
+
+# Ê => E
+"\u00CA" => "E"
+
+# Ë => E
+"\u00CB" => "E"
+
+# Ì => I
+"\u00CC" => "I"
+
+# Í => I
+"\u00CD" => "I"
+
+# Î => I
+"\u00CE" => "I"
+
+# Ï => I
+"\u00CF" => "I"
+
+# Ĳ => IJ
+"\u0132" => "IJ"
+
+# Ð => D
+"\u00D0" => "D"
+
+# Ñ => N
+"\u00D1" => "N"
+
+# Ò => O
+"\u00D2" => "O"
+
+# Ó => O
+"\u00D3" => "O"
+
+# Ô => O
+"\u00D4" => "O"
+
+# Õ => O
+"\u00D5" => "O"
+
+# Ö => O
+"\u00D6" => "O"
+
+# Ø => O
+"\u00D8" => "O"
+
+# Œ => OE
+"\u0152" => "OE"
+
+# Þ
+"\u00DE" => "TH"
+
+# Ù => U
+"\u00D9" => "U"
+
+# Ú => U
+"\u00DA" => "U"
+
+# Û => U
+"\u00DB" => "U"
+
+# Ü => U
+"\u00DC" => "U"
+
+# Ý => Y
+"\u00DD" => "Y"
+
+# Ÿ => Y
+"\u0178" => "Y"
+
+# à => a
+"\u00E0" => "a"
+
+# á => a
+"\u00E1" => "a"
+
+# â => a
+"\u00E2" => "a"
+
+# ã => a
+"\u00E3" => "a"
+
+# ä => a
+"\u00E4" => "a"
+
+# å => a
+"\u00E5" => "a"
+
+# æ => ae
+"\u00E6" => "ae"
+
+# ç => c
+"\u00E7" => "c"
+
+# è => e
+"\u00E8" => "e"
+
+# é => e
+"\u00E9" => "e"
+
+# ê => e
+"\u00EA" => "e"
+
+# ë => e
+"\u00EB" => "e"
+
+# ì => i
+"\u00EC" => "i"
+
+# í => i
+"\u00ED" => "i"
+
+# î => i
+"\u00EE" => "i"
+
+# ï => i
+"\u00EF" => "i"
+
+# ĳ => ij
+"\u0133" => "ij"
+
+# ð => d
+"\u00F0" => "d"
+
+# ñ => n
+"\u00F1" => "n"
+
+# ò => o
+"\u00F2" => "o"
+
+# ó => o
+"\u00F3" => "o"
+
+# ô => o
+"\u00F4" => "o"
+
+# õ => o
+"\u00F5" => "o"
+
+# ö => o
+"\u00F6" => "o"
+
+# ø => o
+"\u00F8" => "o"
+
+# œ => oe
+"\u0153" => "oe"
+
+# ß => ss
+"\u00DF" => "ss"
+
+# þ => th
+"\u00FE" => "th"
+
+# ù => u
+"\u00F9" => "u"
+
+# ú => u
+"\u00FA" => "u"
+
+# û => u
+"\u00FB" => "u"
+
+# ü => u
+"\u00FC" => "u"
+
+# ý => y
+"\u00FD" => "y"
+
+# ÿ => y
+"\u00FF" => "y"
+
+# ﬀ => ff
+"\uFB00" => "ff"
+
+# ﬁ => fi
+"\uFB01" => "fi"
+
+# ﬂ => fl
+"\uFB02" => "fl"
+
+# ﬃ => ffi
+"\uFB03" => "ffi"
+
+# ﬄ => ffl
+"\uFB04" => "ffl"
+
+# ﬅ => ft
+"\uFB05" => "ft"
+
+# ﬆ => st
+"\uFB06" => "st"
diff --git a/zookeeper/solr/collection1/conf/protwords.txt b/zookeeper/solr/collection1/conf/protwords.txt

new file mode 100644 (file)

index 0000000..1dfc0ab
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/protwords.txt
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
diff --git a/zookeeper/solr/collection1/conf/schema.xml b/zookeeper/solr/collection1/conf/schema.xml

new file mode 100644 (file)

index 0000000..27b6dff
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/schema.xml
@@ -0,0 +1,666 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--  
+ This is the Solr schema file. This file should be named "schema.xml" and
+ should be in the conf directory under the solr home
+ (i.e. ./solr/conf/schema.xml by default) 
+ or located where the classloader for the Solr webapp can find it.
+
+ This example schema is the recommended starting point for users.
+ It should be kept correct and concise, usable out-of-the-box.
+
+ For more information, on how to customize this file, please see
+ http://wiki.apache.org/solr/SchemaXml
+
+ PERFORMANCE NOTE: this schema includes many optional features and should not
+ be used for benchmarking.  To improve performance one could
+  - set stored="false" for all fields possible (esp large fields) when you
+    only need to search on the field but don't need to return the original
+    value.
+  - set indexed="false" if you don't need to search on the field, but only
+    return the field as a result of searching on other indexed fields.
+  - remove all unneeded copyField statements
+  - for best index size and searching performance, set "index" to false
+    for all general text fields, use copyField to copy them to the
+    catchall "text" field, and use that for searching.
+  - For maximum indexing performance, use the StreamingUpdateSolrServer
+    java client.
+  - Remember to run the JVM in server mode, and use a higher logging level
+    that avoids logging every request
+-->
+
+<schema name="Local Unified Index" version="1.2">
+  <!-- attribute "name" is the name of this schema and is only used for display purposes.
+       Applications should change this to reflect the nature of the search collection.
+       version="1.2" is Solr's version number for the schema syntax and semantics.  It should
+       not normally be changed by applications.
+       1.0: multiValued attribute did not exist, all fields are multiValued by nature
+       1.1: multiValued attribute introduced, false by default 
+       1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
+     -->
+
+  <types>
+    <!-- field type definitions. The "name" attribute is
+       just a label to be used by field definitions.  The "class"
+       attribute and any other attributes determine the real
+       behavior of the fieldType.
+         Class names starting with "solr" refer to java classes in the
+       org.apache.solr.analysis package.
+    -->
+
+    <!-- The StrField type is not analyzed, but indexed/stored verbatim.  
+       - StrField and TextField support an optional compressThreshold which
+       limits compression (if enabled in the derived fields) to values which
+       exceed a certain size (in characters).
+    -->
+    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
+
+    <!-- boolean type: "true" or "false" -->
+    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
+    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
+    <fieldtype name="binary" class="solr.BinaryField"/>
+
+    <!-- The optional sortMissingLast and sortMissingFirst attributes are
+         currently supported on types that are sorted internally as strings.
+              This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
+       - If sortMissingLast="true", then a sort on this field will cause documents
+         without the field to come after documents with the field,
+         regardless of the requested sort order (asc or desc).
+       - If sortMissingFirst="true", then a sort on this field will cause documents
+         without the field to come before documents with the field,
+         regardless of the requested sort order.
+       - If sortMissingLast="false" and sortMissingFirst="false" (the default),
+         then default lucene sorting will be used which places docs without the
+         field first in an ascending sort and last in a descending sort.
+    -->    
+
+    <!--
+      Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
+    -->
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+
+    <!--
+     Numeric field types that index each value at various levels of precision
+     to accelerate range queries when the number of values between the range
+     endpoints is large. See the javadoc for NumericRangeQuery for internal
+     implementation details.
+
+     Smaller precisionStep values (specified in bits) will lead to more tokens
+     indexed per value, slightly larger index size, and faster range queries.
+     A precisionStep of 0 disables indexing at different precision levels.
+    -->
+    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+
+    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
+         is a more restricted form of the canonical representation of dateTime
+         http://www.w3.org/TR/xmlschema-2/#dateTime    
+         The trailing "Z" designates UTC time and is mandatory.
+         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
+         All other components are mandatory.
+
+         Expressions can also be used to denote calculations that should be
+         performed relative to "NOW" to determine the value, ie...
+
+               NOW/HOUR
+                  ... Round to the start of the current hour
+               NOW-1DAY
+                  ... Exactly 1 day prior to now
+               NOW/DAY+6MONTHS+3DAYS
+                  ... 6 months and 3 days in the future from the start of
+                      the current day
+                      
+         Consult the DateField javadocs for more information.
+
+         Note: For faster range queries, consider the tdate type
+      -->
+    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
+
+    <!-- A Trie based date field for faster date range queries and date faceting. -->
+    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
+
+    <!-- Test of new facet type that would support case-insensitive facetting -->
+    <fieldType name="facet" class="solr.TextField" sortMissingLast="true" omitNorms="true">
+      <analyzer>
+        <!-- KeywordTokenizer does no actual tokenizing, so the entire                                                                                                                                              
+             input string is preserved as a single token 
+          -->
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <!-- The LowerCase TokenFilter does what you expect, which can be
+            when you want your sorting to be case insensitive
+          -->
+        <filter class="solr.LowerCaseFilterFactory" />
+        <!-- The TrimFilter removes any leading or trailing whitespace -->
+        <filter class="solr.TrimFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+    <!--
+      Note:
+      These should only be used for compatibility with existing indexes (created with older Solr versions)
+      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
+
+      Plain numeric field types that store and index the text
+      value verbatim (and hence don't support range queries, since the
+      lexicographic ordering isn't equal to the numeric ordering)
+    -->
+    <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
+    <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
+    <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
+    <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
+    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
+
+
+    <!--
+      Note:
+      These should only be used for compatibility with existing indexes (created with older Solr versions)
+      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
+
+      Numeric field types that manipulate the value into
+      a string value that isn't human-readable in its internal form,
+      but with a lexicographic ordering the same as the numeric ordering,
+      so that range queries work correctly.
+    -->
+    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
+    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
+    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
+    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
+
+
+    <!-- The "RandomSortField" is not used to store or search any
+         data.  You can declare fields of this type it in your schema
+         to generate pseudo-random orderings of your docs for sorting 
+         purposes.  The ordering is generated based on the field name 
+         and the version of the index, As long as the index version
+         remains unchanged, and the same field name is reused,
+         the ordering of the docs will be consistent.  
+         If you want different psuedo-random orderings of documents,
+         for the same version of the index, use a dynamicField and
+         change the name
+     -->
+    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
+
+    <!-- solr.TextField allows the specification of custom text analyzers
+         specified as a tokenizer and a list of token filters. Different
+         analyzers may be specified for indexing and querying.
+
+         The optional positionIncrementGap puts space between multiple fields of
+         this type on the same document, with the purpose of preventing false phrase
+         matching across fields.
+
+         For more info on customizing your analyzer chain, please see
+         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
+     -->
+
+    <!-- One can also specify an existing Analyzer class that has a
+         default constructor via the class attribute on the analyzer element
+    <fieldType name="text_greek" class="solr.TextField">
+      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
+    </fieldType>
+    -->
+
+    <!-- A text field that only splits on whitespace for exact matching of words -->
+    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
+        words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
+        so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
+        Synonyms and stopwords are customized by external files, and stemming is enabled.
+        -->
+    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <!-- Case insensitive stop word removal.
+          add enablePositionIncrements=true in both the index and query
+          analyzers to leave a 'gap' for more accurate phrase queries.
+        -->
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+      </analyzer>
+    </fieldType>
+
+
+    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
+         but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
+    <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
+             possible with WordDelimiterFilter in conjuncton with stemming. -->
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+
+    <!-- A general unstemmed text field - good if one does not know the language of the field -->
+    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+
+    <!-- A general unstemmed text field that indexes tokens normally and also
+         reversed (via ReversedWildcardFilterFactory), to enable more efficient 
+        leading wildcard queries. -->
+    <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
+           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- charFilter + WhitespaceTokenizer  -->
+    <!--
+    <fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
+      <analyzer>
+        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+    -->
+
+    <!-- This is an example of using the KeywordTokenizer along
+         With various TokenFilterFactories to produce a sortable field
+         that does not include some properties of the source text
+      -->
+    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
+      <analyzer>
+        <!-- KeywordTokenizer does no actual tokenizing, so the entire
+             input string is preserved as a single token
+          -->
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <!-- The LowerCase TokenFilter does what you expect, which can be
+             when you want your sorting to be case insensitive
+          -->
+        <filter class="solr.LowerCaseFilterFactory" />
+        <!-- The TrimFilter removes any leading or trailing whitespace -->
+        <filter class="solr.TrimFilterFactory" />
+        <!-- The PatternReplaceFilter gives you the flexibility to use
+             Java Regular expression to replace any sequence of characters
+             matching a pattern with an arbitrary replacement string, 
+             which may include back references to portions of the original
+             string matched by the pattern.
+             
+             See the Java Regular Expression documentation for more
+             information on pattern and replacement string syntax.
+             
+             http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
+          -->
+        <filter class="solr.PatternReplaceFilterFactory"
+                pattern="([^a-z])" replacement="" replace="all"
+        />
+      </analyzer>
+    </fieldType>
+    
+    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
+      </analyzer>
+    </fieldtype>
+
+    <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <!--
+        The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
+        a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
+        Attributes of the DelimitedPayloadTokenFilterFactory : 
+         "delimiter" - a one character delimiter. Default is | (pipe)
+        "encoder" - how to encode the following value into a playload
+           float -> org.apache.lucene.analysis.payloads.FloatEncoder,
+           integer -> o.a.l.a.p.IntegerEncoder
+           identity -> o.a.l.a.p.IdentityEncoder
+            Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
+         -->
+        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- lowercases the entire field value, keeping it as a single token.  -->
+    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+
+    <!-- since fields of this type are by default not stored or indexed,
+         any data added to them will be ignored outright.  --> 
+    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> 
+
+    <!-- subject -->
+    <fieldType name="subject" class="solr.StrField" positionIncrementGap="100">
+<!--
+      <analyzer>
+        <tokenizer class="solr.PatternTokenizerFactory" pattern=";" />
+        <filter class="solr.StandardFilterFactory" />
+        <filter class="solr.TrimFilterFactory" />
+      </analyzer>
+-->
+    </fieldType>
+
+    <!-- Author type  -->
+    <fieldType name="author" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.PatternTokenizerFactory" pattern=";" />
+        <filter class="solr.StandardFilterFactory" />
+        <filter class="solr.TrimFilterFactory" />
+<!--
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+-->
+      </analyzer>
+    </fieldType>
+
+
+ </types>
+
+
+ <fields>
+   <!-- Valid attributes for fields:
+     name: mandatory - the name for the field
+     type: mandatory - the name of a previously defined type from the 
+       <types> section
+     indexed: true if this field should be indexed (searchable or sortable)
+     stored: true if this field should be retrievable
+     compressed: [false] if this field should be stored using gzip compression
+       (this will only apply if the field type is compressable; among
+       the standard field types, only TextField and StrField are)
+     multiValued: true if this field may contain multiple values per document
+     omitNorms: (expert) set to true to omit the norms associated with
+       this field (this disables length normalization and index-time
+       boosting for the field, and saves some memory).  Only full-text
+       fields or fields that need an index-time boost need norms.
+     termVectors: [false] set to true to store the term vector for a
+       given field.
+       When using MoreLikeThis, fields used for similarity should be
+       stored for best performance.
+     termPositions: Store position information with the term vector.  
+       This will increase storage costs.
+     termOffsets: Store offset information with the term vector. This 
+       will increase storage costs.
+     default: a value that should be used if no value is specified
+       when adding a document.
+   -->
+
+<!-- -->
+   <field name="id" type="string" indexed="true" stored="true" required="true" /> 
+   <field name="_version_" type="long" indexed="true" stored="true"/>
+
+   <field name="transactionId" type="long" indexed="true" stored="false"/>
+
+   <field name="author"       type="text"   indexed="true" stored="true"  multiValued="true"  omitNorms="true"/>
+   <field name="author_exact" type="string"  indexed="true" stored="false" multiValued="true"  omitNorms="true" docValues="true" />
+   <field name="author-date"  type="text"   indexed="true" stored="true"  multiValued="true" omitNorms="true"/>
+   <field name="author-title" type="text"   indexed="true" stored="true"  multiValued="true" omitNorms="true"/>
+
+   <field name="corporate-date"     type="text" indexed="true" stored="true" omitNorms="true"/>
+   <field name="corporate-location" type="text" indexed="true" stored="true" omitNorms="true"/>
+   <field name="corporate-name"     type="text" indexed="true" stored="true" omitNorms="true"/>
+
+   <field name="callnumber" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="citation"   type="text" indexed="true" stored="true" multiValued="true"  omitNorms="true"/>
+
+   <field name="date"        type="text"    indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="description" type="text_ws" indexed="true" stored="true" multiValued="true"  omitNorms="true"/>
+
+   <field name="edition" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="electronic-format-instruction" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="electronic-format-type"        type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="electronic-note"               type="text" indexed="true" stored="true" multiValued="true"  omitNorms="true"/>
+   <field name="electronic-text"               type="text" indexed="true" stored="true" multiValued="true"  omitNorms="true"/>
+   <field name="electronic-url"                type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="isbn" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="issn" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="lccn" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="medium"       type="text"  indexed="true" stored="true"  multiValued="true" omitNorms="true"/>
+   <field name="medium_exact" type="facet" indexed="true" stored="false" multiValued="true" omitNorms="true"/>
+
+   <field name="meeting-date"     type="text" indexed="true" stored="true" omitNorms="true"/>
+   <field name="meeting-location" type="text" indexed="true" stored="true" omitNorms="true"/>
+   <field name="meeting-name"     type="text" indexed="true" stored="true" omitNorms="true"/>
+
+   <field name="series-title" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="subject"       type="text_ws" indexed="true" stored="true"  multiValued="true" omitNorms="true" />
+   <field name="subject_exact" type="facet"   indexed="true" stored="false" multiValued="true" omitNorms="true" />
+   <field name="subject-long"  type="text_ws" indexed="true" stored="true"  multiValued="true" omitNorms="true" />
+
+   <field name="system-control-nr" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="tech-rep-nr" type="text" indexed="true" multiValued="true" stored="true"/>
+
+   <field name="title"                  type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-complete"         type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-dates"            type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-medium"           type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-number-section"   type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-remainder"        type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-responsibility"   type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-uniform"          type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-uniform-key"      type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-uniform-media"    type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-uniform-partname" type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-uniform-parts"    type="text"   indexed="true" multiValued="true" stored="true"/>
+
+   <field name="journal-title"          type="text" indexed="true" multiValued="true"  stored="true" />
+   <field name="journal-title_exact"    type="text" indexed="true" multiValued="true"  stored="false"/>
+
+   <field name="physical-accomp"     type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-dimensions" type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-extent"     type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-format"     type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-specified"  type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-unitsize"   type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-unittype"   type="text" indexed="true" stored="true" multiValued="true" />
+
+   <field name="publication-date"  type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="publication-name"  type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="publication-place" type="text" indexed="true" stored="true" multiValued="true" />
+
+
+   <!-- Common metadata fields, named specifically to match up with
+     SolrCell metadata when parsing rich documents such as Word, PDF.
+     Some fields are multiValued only because Tika currently may return
+     multiple values for them.
+   -->
+<!--
+   <field name="title" type="text" indexed="true" stored="true" multiValued="true"/>
+   <field name="subject" type="text" indexed="true" stored="true"/>
+   <field name="description" type="text" indexed="true" stored="true" multiValued="true"/>
+   <field name="comments" type="text" indexed="true" stored="true"/>
+   <field name="author" type="textgen" indexed="true" stored="true"/>
+   <field name="keywords" type="textgen" indexed="true" stored="true"/>
+   <field name="category" type="textgen" indexed="true" stored="true"/>
+   <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
+   <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
+-->
+   <field name="harvest-timestamp" type="date"   indexed="true" stored="true"/>
+   <field name="harvest-date"      type="string" indexed="true" stored="true"/>
+
+   <!-- catchall field, containing all other searchable text fields (implemented
+        via copyField further on in this schema  -->
+   <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
+
+   <!-- catchall text field that indexes tokens both normally and in reverse for efficient
+        leading wildcard queries. -->
+   <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
+
+   <field name="payloads" type="payloads" indexed="true" stored="true"/>
+
+   <!-- Uncommenting the following will create a "timestamp" field using
+        a default value of "NOW" to indicate when each document was indexed.
+     -->
+   <!--
+   <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
+     -->
+   
+
+   <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
+        will be used if the name matches any of the patterns.
+        RESTRICTION: the glob-like pattern in the name attribute must have
+        a "*" only at the start or the end.
+        EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
+        Longer patterns will be matched first.  if equal size patterns
+        both match, the first appearing in the schema will be used.  -->
+   <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
+   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
+   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
+   <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
+   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
+   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
+   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
+   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
+
+   <!-- some trie-coded dynamic fields for faster range queries -->
+   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
+   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
+   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
+   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
+   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>
+
+   <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/>
+
+   <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
+   <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>
+
+   <dynamicField name="random_*" type="random" />
+
+   <!-- uncomment the following to ignore any fields that don't already match an existing 
+        field name or dynamic field, rather than reporting them as an error. 
+        alternately, change the type="ignored" to some other type e.g. "text" if you want 
+        unknown fields indexed and/or stored by default --> 
+   <dynamicField name="*" type="text" multiValued="true" />
+   
+ </fields>
+
+ <!-- Field to use to determine and enforce document uniqueness. 
+      Unless this field is marked with required="false", it will be a required field
+   -->
+ <uniqueKey>id</uniqueKey>
+
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
+ <defaultSearchField>text</defaultSearchField>
+
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
+ <solrQueryParser defaultOperator="AND"/>
+
+  <!-- copyField commands copy one field to another at the time a document
+        is added to the index.  It's used either to index the same field differently,
+        or to add multiple fields to the same field for easier/faster searching.  -->
+
+   <copyField source="author"  dest="author_exact"/>
+   <copyField source="subject" dest="subject_exact"/>
+   <copyField source="medium"  dest="medium_exact"/>
+   <copyField source="journal-title"   dest="journal-title_exact"/>
+       
+   <!-- Above, multiple source fields are copied to the [text] field. 
+         Another way to map multiple source fields to the same 
+         destination field is to use the dynamic field syntax. 
+         copyField also supports a maxChars to copy setting.  -->
+          
+   <copyField source="*" dest="text" maxChars="10000"/> 
+
+   <!-- copy name to alphaNameSort, a field designed for sorting by name -->
+   <!-- <copyField source="name" dest="alphaNameSort"/> -->
+ 
+
+ <!-- Similarity is the scoring routine for each document vs. a query.
+      A custom similarity may be specified here, but the default is fine
+      for most applications.  -->
+ <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
+ <!-- ... OR ...
+      Specify a SimilarityFactory class name implementation
+      allowing parameters to be used.
+ -->
+ <!--
+ <similarity class="com.example.solr.CustomSimilarityFactory">
+   <str name="paramkey">param value</str>
+ </similarity>
+ -->
+</schema>
diff --git a/zookeeper/solr/collection1/conf/scripts.conf b/zookeeper/solr/collection1/conf/scripts.conf

new file mode 100644 (file)

index 0000000..f58b262
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/scripts.conf
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+user=
+solr_hostname=localhost
+solr_port=8983
+rsyncd_port=18983
+data_dir=
+webapp_name=solr
+master_host=
+master_data_dir=
+master_status_dir=
diff --git a/zookeeper/solr/collection1/conf/solrconfig.xml b/zookeeper/solr/collection1/conf/solrconfig.xml

new file mode 100644 (file)

index 0000000..156314d
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/solrconfig.xml
@@ -0,0 +1,1823 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- 
+     For more details about configurations options that may appear in
+     this file, see http://wiki.apache.org/solr/SolrConfigXml. 
+-->
+<config>
+  <!-- In all configuration below, a prefix of "solr." for class names
+       is an alias that causes solr to search appropriate packages,
+       including org.apache.solr.(search|update|request|core|analysis)
+
+       You may also specify a fully qualified Java classname if you
+       have your own custom plugins.
+    -->
+
+  <!-- Controls what version of Lucene various components of Solr
+       adhere to.  Generally, you want to use the latest version to
+       get all bug fixes and improvements. It is highly recommended
+       that you fully re-index after changing this setting as it can
+       affect both how text is indexed and queried.
+  -->
+  <luceneMatchVersion>4.4</luceneMatchVersion>
+
+  <!-- <lib/> directives can be used to instruct Solr to load an Jars
+       identified and use them to resolve any "plugins" specified in
+       your solrconfig.xml or schema.xml (ie: Analyzers, Request
+       Handlers, etc...).
+
+       All directories and paths are resolved relative to the
+       instanceDir.
+
+       Please note that <lib/> directives are processed in the order
+       that they appear in your solrconfig.xml file, and are "stacked" 
+       on top of each other when building a ClassLoader - so if you have 
+       plugin jars with dependencies on other jars, the "lower level" 
+       dependency jars should be loaded first.
+
+       If a "./lib" directory exists in your instanceDir, all files
+       found in it are included as if you had used the following
+       syntax...
+       
+              <lib dir="./lib" />
+    -->
+
+  <!-- A 'dir' option by itself adds any files found in the directory 
+       to the classpath, this is useful for including all jars in a
+       directory.
+
+       When a 'regex' is specified in addition to a 'dir', only the
+       files in that directory which completely match the regex
+       (anchored on both ends) will be included.
+
+       If a 'dir' option (with or without a regex) is used and nothing
+       is found that matches, a warning will be logged.
+
+       The examples below can be used to load some solr-contribs along 
+       with their external dependencies.
+    -->
+  <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
+  <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+
+  <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
+  <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+
+  <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
+  <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+  <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
+  <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+
+  <!-- an exact 'path' can be used instead of a 'dir' to specify a 
+       specific jar file.  This will cause a serious error to be logged 
+       if it can't be loaded.
+    -->
+  <!--
+     <lib path="../a-jar-that-does-not-exist.jar" /> 
+  -->
+  
+  <!-- Data Directory
+
+       Used to specify an alternate directory to hold all index data
+       other than the default ./data under the Solr home.  If
+       replication is in use, this should match the replication
+       configuration.
+    -->
+  <dataDir>${solr.data.dir:}</dataDir>
+
+
+  <!-- The DirectoryFactory to use for indexes.
+       
+       solr.StandardDirectoryFactory is filesystem
+       based and tries to pick the best implementation for the current
+       JVM and platform.  solr.NRTCachingDirectoryFactory, the default,
+       wraps solr.StandardDirectoryFactory and caches small files in memory
+       for better NRT performance.
+
+       One can force a particular implementation via solr.MMapDirectoryFactory,
+       solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
+
+       solr.RAMDirectoryFactory is memory based, not
+       persistent, and doesn't work with replication.
+    -->
+  <directoryFactory name="DirectoryFactory" 
+                    class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> 
+
+  <!-- The CodecFactory for defining the format of the inverted index.
+       The default implementation is SchemaCodecFactory, which is the official Lucene
+       index format, but hooks into the schema to provide per-field customization of
+       the postings lists and per-document values in the fieldType element
+       (postingsFormat/docValuesFormat). Note that most of the alternative implementations
+       are experimental, so if you choose to customize the index format, its a good
+       idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
+       before upgrading to a newer version to avoid unnecessary reindexing.
+  -->
+  <codecFactory class="solr.SchemaCodecFactory"/>
+
+  <!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
+  
+       <schemaFactory class="ManagedIndexSchemaFactory">
+         <bool name="mutable">true</bool>
+         <str name="managedSchemaResourceName">managed-schema</str>
+       </schemaFactory>
+       
+       When ManagedIndexSchemaFactory is specified, Solr will load the schema from
+       he resource named in 'managedSchemaResourceName', rather than from schema.xml.
+       Note that the managed schema resource CANNOT be named schema.xml.  If the managed
+       schema does not exist, Solr will create it after reading schema.xml, then rename
+       'schema.xml' to 'schema.xml.bak'. 
+       
+       Do NOT hand edit the managed schema - external modifications will be ignored and
+       overwritten as a result of schema modification REST API calls.
+
+       When ManagedIndexSchemaFactory is specified with mutable = true, schema
+       modification REST API calls will be allowed; otherwise, error responses will be
+       sent back for these requests. 
+  -->
+  <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+  <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+       Index Config - These settings control low-level behavior of indexing
+       Most example settings here show the default value, but are commented
+       out, to more easily see where customizations have been made.
+       
+       Note: This replaces <indexDefaults> and <mainIndex> from older versions
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
+  <indexConfig>
+    <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a 
+         LimitTokenCountFilterFactory in your fieldType definition. E.g. 
+     <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
+    -->
+    <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
+    <!-- <writeLockTimeout>1000</writeLockTimeout>  -->
+
+    <!-- The maximum number of simultaneous threads that may be
+         indexing documents at once in IndexWriter; if more than this
+         many threads arrive they will wait for others to finish.
+         Default in Solr/Lucene is 8. -->
+    <!-- <maxIndexingThreads>8</maxIndexingThreads>  -->
+
+    <!-- Expert: Enabling compound file will use less files for the index, 
+         using fewer file descriptors on the expense of performance decrease. 
+         Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
+    <!-- <useCompoundFile>false</useCompoundFile> -->
+
+    <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
+         indexing for buffering added documents and deletions before they are
+         flushed to the Directory.
+         maxBufferedDocs sets a limit on the number of documents buffered
+         before flushing.
+         If both ramBufferSizeMB and maxBufferedDocs is set, then
+         Lucene will flush based on whichever limit is hit first.
+         The default is 100 MB.  -->
+    <!-- <ramBufferSizeMB>100</ramBufferSizeMB> -->
+    <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
+
+    <!-- Expert: Merge Policy 
+         The Merge Policy in Lucene controls how merging of segments is done.
+         The default since Solr/Lucene 3.3 is TieredMergePolicy.
+         The default since Lucene 2.3 was the LogByteSizeMergePolicy,
+         Even older versions of Lucene used LogDocMergePolicy.
+      -->
+    <!--
+        <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
+          <int name="maxMergeAtOnce">10</int>
+          <int name="segmentsPerTier">10</int>
+        </mergePolicy>
+      -->
+       
+    <!-- Merge Factor
+         The merge factor controls how many segments will get merged at a time.
+         For TieredMergePolicy, mergeFactor is a convenience parameter which
+         will set both MaxMergeAtOnce and SegmentsPerTier at once.
+         For LogByteSizeMergePolicy, mergeFactor decides how many new segments
+         will be allowed before they are merged into one.
+         Default is 10 for both merge policies.
+      -->
+    <!-- 
+    <mergeFactor>10</mergeFactor>
+      -->
+
+    <!-- Expert: Merge Scheduler
+         The Merge Scheduler in Lucene controls how merges are
+         performed.  The ConcurrentMergeScheduler (Lucene 2.3 default)
+         can perform merges in the background using separate threads.
+         The SerialMergeScheduler (Lucene 2.2 default) does not.
+     -->
+    <!-- 
+       <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
+       -->
+
+    <!-- LockFactory 
+
+         This option specifies which Lucene LockFactory implementation
+         to use.
+      
+         single = SingleInstanceLockFactory - suggested for a
+                  read-only index or when there is no possibility of
+                  another process trying to modify the index.
+         native = NativeFSLockFactory - uses OS native file locking.
+                  Do not use when multiple solr webapps in the same
+                  JVM are attempting to share a single index.
+         simple = SimpleFSLockFactory  - uses a plain file for locking
+
+         Defaults: 'native' is default for Solr3.6 and later, otherwise
+                   'simple' is the default
+
+         More details on the nuances of each LockFactory...
+         http://wiki.apache.org/lucene-java/AvailableLockFactories
+    -->
+    <lockType>${solr.lock.type:native}</lockType>
+
+    <!-- Unlock On Startup
+
+         If true, unlock any held write or commit locks on startup.
+         This defeats the locking mechanism that allows multiple
+         processes to safely access a lucene index, and should be used
+         with care. Default is "false".
+
+         This is not needed if lock type is 'single'
+     -->
+    <!--
+    <unlockOnStartup>false</unlockOnStartup>
+      -->
+    
+    <!-- Expert: Controls how often Lucene loads terms into memory
+         Default is 128 and is likely good for most everyone.
+      -->
+    <!-- <termIndexInterval>128</termIndexInterval> -->
+
+    <!-- If true, IndexReaders will be reopened (often more efficient)
+         instead of closed and then opened. Default: true
+      -->
+    <!-- 
+    <reopenReaders>true</reopenReaders>
+      -->
+
+    <!-- Commit Deletion Policy
+         Custom deletion policies can be specified here. The class must
+         implement org.apache.lucene.index.IndexDeletionPolicy.
+
+         The default Solr IndexDeletionPolicy implementation supports
+         deleting index commit points on number of commits, age of
+         commit point and optimized status.
+         
+         The latest commit point should always be preserved regardless
+         of the criteria.
+    -->
+    <!-- 
+    <deletionPolicy class="solr.SolrDeletionPolicy">
+    -->
+      <!-- The number of commit points to be kept -->
+      <!-- <str name="maxCommitsToKeep">1</str> -->
+      <!-- The number of optimized commit points to be kept -->
+      <!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
+      <!--
+          Delete all commit points once they have reached the given age.
+          Supports DateMathParser syntax e.g.
+        -->
+      <!--
+         <str name="maxCommitAge">30MINUTES</str>
+         <str name="maxCommitAge">1DAY</str>
+      -->
+    <!-- 
+    </deletionPolicy>
+    -->
+
+    <!-- Lucene Infostream
+       
+         To aid in advanced debugging, Lucene provides an "InfoStream"
+         of detailed information when indexing.
+
+         Setting the value to true will instruct the underlying Lucene
+         IndexWriter to write its info stream to solr's log. By default,
+         this is enabled here, and controlled through log4j.properties.
+      -->
+     <infoStream>true</infoStream>
+  </indexConfig>
+
+
+  <!-- JMX
+       
+       This example enables JMX if and only if an existing MBeanServer
+       is found, use this if you want to configure JMX through JVM
+       parameters. Remove this to disable exposing Solr configuration
+       and statistics to JMX.
+
+       For more details see http://wiki.apache.org/solr/SolrJmx
+    -->
+  <jmx />
+  <!-- If you want to connect to a particular server, specify the
+       agentId 
+    -->
+  <!-- <jmx agentId="myAgent" /> -->
+  <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
+  <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
+    -->
+
+  <!-- The default high-performance update handler -->
+  <updateHandler class="solr.DirectUpdateHandler2">
+
+    <!-- Enables a transaction log, used for real-time get, durability, and
+         and solr cloud replica recovery.  The log can grow as big as
+         uncommitted changes to the index, so use of a hard autoCommit
+         is recommended (see below).
+         "dir" - the target directory for transaction logs, defaults to the
+                solr data directory.  --> 
+    <updateLog>
+      <str name="dir">${solr.ulog.dir:}</str>
+    </updateLog>
+ 
+    <!-- AutoCommit
+
+         Perform a hard commit automatically under certain conditions.
+         Instead of enabling autoCommit, consider using "commitWithin"
+         when adding documents. 
+
+         http://wiki.apache.org/solr/UpdateXmlMessages
+
+         maxDocs - Maximum number of documents to add since the last
+                   commit before automatically triggering a new commit.
+
+         maxTime - Maximum amount of time in ms that is allowed to pass
+                   since a document was added before automatically
+                   triggering a new commit. 
+         openSearcher - if false, the commit causes recent index changes
+           to be flushed to stable storage, but does not cause a new
+           searcher to be opened to make those changes visible.
+
+         If the updateLog is enabled, then it's highly recommended to
+         have some sort of hard autoCommit to limit the log size.
+      -->
+     <autoCommit> 
+       <maxTime>${solr.autoCommit.maxTime:15000}</maxTime> 
+       <openSearcher>false</openSearcher> 
+     </autoCommit>
+
+    <!-- softAutoCommit is like autoCommit except it causes a
+         'soft' commit which only ensures that changes are visible
+         but does not ensure that data is synced to disk.  This is
+         faster and more near-realtime friendly than a hard commit.
+      -->
+
+     <autoSoftCommit> 
+       <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime> 
+     </autoSoftCommit>
+
+    <!-- Update Related Event Listeners
+         
+         Various IndexWriter related events can trigger Listeners to
+         take actions.
+
+         postCommit - fired after every commit or optimize command
+         postOptimize - fired after every optimize command
+      -->
+    <!-- The RunExecutableListener executes an external command from a
+         hook such as postCommit or postOptimize.
+         
+         exe - the name of the executable to run
+         dir - dir to use as the current working directory. (default=".")
+         wait - the calling thread waits until the executable returns. 
+                (default="true")
+         args - the arguments to pass to the program.  (default is none)
+         env - environment variables to set.  (default is none)
+      -->
+    <!-- This example shows how RunExecutableListener could be used
+         with the script based replication...
+         http://wiki.apache.org/solr/CollectionDistribution
+      -->
+    <!--
+       <listener event="postCommit" class="solr.RunExecutableListener">
+         <str name="exe">solr/bin/snapshooter</str>
+         <str name="dir">.</str>
+         <bool name="wait">true</bool>
+         <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
+         <arr name="env"> <str>MYVAR=val1</str> </arr>
+       </listener>
+      -->
+
+  </updateHandler>
+  
+  <!-- IndexReaderFactory
+
+       Use the following format to specify a custom IndexReaderFactory,
+       which allows for alternate IndexReader implementations.
+
+       ** Experimental Feature **
+
+       Please note - Using a custom IndexReaderFactory may prevent
+       certain other features from working. The API to
+       IndexReaderFactory may change without warning or may even be
+       removed from future releases if the problems cannot be
+       resolved.
+
+
+       ** Features that may not work with custom IndexReaderFactory **
+
+       The ReplicationHandler assumes a disk-resident index. Using a
+       custom IndexReader implementation may cause incompatibility
+       with ReplicationHandler and may cause replication to not work
+       correctly. See SOLR-1366 for details.
+
+    -->
+  <!--
+  <indexReaderFactory name="IndexReaderFactory" class="package.class">
+    <str name="someArg">Some Value</str>
+  </indexReaderFactory >
+  -->
+  <!-- By explicitly declaring the Factory, the termIndexDivisor can
+       be specified.
+    -->
+  <!--
+     <indexReaderFactory name="IndexReaderFactory" 
+                         class="solr.StandardIndexReaderFactory">
+       <int name="setTermIndexDivisor">12</int>
+     </indexReaderFactory >
+    -->
+
+  <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+       Query section - these settings control query time things like caches
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
+  <query>
+    <!-- Max Boolean Clauses
+
+         Maximum number of clauses in each BooleanQuery,  an exception
+         is thrown if exceeded.
+
+         ** WARNING **
+         
+         This option actually modifies a global Lucene property that
+         will affect all SolrCores.  If multiple solrconfig.xml files
+         disagree on this property, the value at any given moment will
+         be based on the last SolrCore to be initialized.
+         
+      -->
+    <maxBooleanClauses>1024</maxBooleanClauses>
+
+
+    <!-- Solr Internal Query Caches
+
+         There are two implementations of cache available for Solr,
+         LRUCache, based on a synchronized LinkedHashMap, and
+         FastLRUCache, based on a ConcurrentHashMap.  
+
+         FastLRUCache has faster gets and slower puts in single
+         threaded operation and thus is generally faster than LRUCache
+         when the hit ratio of the cache is high (> 75%), and may be
+         faster under other scenarios on multi-cpu systems.
+    -->
+
+    <!-- Filter Cache
+
+         Cache used by SolrIndexSearcher for filters (DocSets),
+         unordered sets of *all* documents that match a query.  When a
+         new searcher is opened, its caches may be prepopulated or
+         "autowarmed" using data from caches in the old searcher.
+         autowarmCount is the number of items to prepopulate.  For
+         LRUCache, the autowarmed items will be the most recently
+         accessed items.
+
+         Parameters:
+           class - the SolrCache implementation LRUCache or
+               (LRUCache or FastLRUCache)
+           size - the maximum number of entries in the cache
+           initialSize - the initial capacity (number of entries) of
+               the cache.  (see java.util.HashMap)
+           autowarmCount - the number of entries to prepopulate from
+               and old cache.  
+      -->
+    <filterCache class="solr.FastLRUCache"
+                 size="512"
+                 initialSize="512"
+                 autowarmCount="0"/>
+
+    <!-- Query Result Cache
+         
+         Caches results of searches - ordered lists of document ids
+         (DocList) based on a query, a sort, and the range of documents requested.  
+      -->
+    <queryResultCache class="solr.LRUCache"
+                     size="512"
+                     initialSize="512"
+                     autowarmCount="0"/>
+   
+    <!-- Document Cache
+
+         Caches Lucene Document objects (the stored fields for each
+         document).  Since Lucene internal document ids are transient,
+         this cache will not be autowarmed.  
+      -->
+    <documentCache class="solr.LRUCache"
+                   size="512"
+                   initialSize="512"
+                   autowarmCount="0"/>
+    
+    <!-- Field Value Cache
+         
+         Cache used to hold field values that are quickly accessible
+         by document id.  The fieldValueCache is created by default
+         even if not configured here.
+      -->
+    <!--
+       <fieldValueCache class="solr.FastLRUCache"
+                        size="512"
+                        autowarmCount="128"
+                        showItems="32" />
+      -->
+
+    <!-- Custom Cache
+
+         Example of a generic cache.  These caches may be accessed by
+         name through SolrIndexSearcher.getCache(),cacheLookup(), and
+         cacheInsert().  The purpose is to enable easy caching of
+         user/application level data.  The regenerator argument should
+         be specified as an implementation of solr.CacheRegenerator 
+         if autowarming is desired.  
+      -->
+    <!--
+       <cache name="myUserCache"
+              class="solr.LRUCache"
+              size="4096"
+              initialSize="1024"
+              autowarmCount="1024"
+              regenerator="com.mycompany.MyRegenerator"
+              />
+      -->
+
+
+    <!-- Lazy Field Loading
+
+         If true, stored fields that are not requested will be loaded
+         lazily.  This can result in a significant speed improvement
+         if the usual case is to not load all stored fields,
+         especially if the skipped fields are large compressed text
+         fields.
+    -->
+    <enableLazyFieldLoading>true</enableLazyFieldLoading>
+
+   <!-- Use Filter For Sorted Query
+
+        A possible optimization that attempts to use a filter to
+        satisfy a search.  If the requested sort does not include
+        score, then the filterCache will be checked for a filter
+        matching the query. If found, the filter will be used as the
+        source of document ids, and then the sort will be applied to
+        that.
+
+        For most situations, this will not be useful unless you
+        frequently get the same search repeatedly with different sort
+        options, and none of them ever use "score"
+     -->
+   <!--
+      <useFilterForSortedQuery>true</useFilterForSortedQuery>
+     -->
+
+   <!-- Result Window Size
+
+        An optimization for use with the queryResultCache.  When a search
+        is requested, a superset of the requested number of document ids
+        are collected.  For example, if a search for a particular query
+        requests matching documents 10 through 19, and queryWindowSize is 50,
+        then documents 0 through 49 will be collected and cached.  Any further
+        requests in that range can be satisfied via the cache.  
+     -->
+   <queryResultWindowSize>20</queryResultWindowSize>
+
+   <!-- Maximum number of documents to cache for any entry in the
+        queryResultCache. 
+     -->
+   <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
+
+   <!-- Query Related Event Listeners
+
+        Various IndexSearcher related events can trigger Listeners to
+        take actions.
+
+        newSearcher - fired whenever a new searcher is being prepared
+        and there is a current searcher handling requests (aka
+        registered).  It can be used to prime certain caches to
+        prevent long request times for certain requests.
+
+        firstSearcher - fired whenever a new searcher is being
+        prepared but there is no current registered searcher to handle
+        requests or to gain autowarming data from.
+
+        
+     -->
+    <!-- QuerySenderListener takes an array of NamedList and executes a
+         local query request for each NamedList in sequence. 
+      -->
+    <listener event="newSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <!--
+           <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
+           <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
+          -->
+      </arr>
+    </listener>
+    <listener event="firstSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <lst>
+          <str name="q">static firstSearcher warming in solrconfig.xml</str>
+        </lst>
+      </arr>
+    </listener>
+
+    <!-- Use Cold Searcher
+
+         If a search request comes in and there is no current
+         registered searcher, then immediately register the still
+         warming searcher and use it.  If "false" then all requests
+         will block until the first searcher is done warming.
+      -->
+    <useColdSearcher>false</useColdSearcher>
+
+    <!-- Max Warming Searchers
+         
+         Maximum number of searchers that may be warming in the
+         background concurrently.  An error is returned if this limit
+         is exceeded.
+
+         Recommend values of 1-2 for read-only slaves, higher for
+         masters w/o cache warming.
+      -->
+    <maxWarmingSearchers>2</maxWarmingSearchers>
+
+  </query>
+
+
+  <!-- Request Dispatcher
+
+       This section contains instructions for how the SolrDispatchFilter
+       should behave when processing requests for this SolrCore.
+
+       handleSelect is a legacy option that affects the behavior of requests
+       such as /select?qt=XXX
+
+       handleSelect="true" will cause the SolrDispatchFilter to process
+       the request and dispatch the query to a handler specified by the 
+       "qt" param, assuming "/select" isn't already registered.
+
+       handleSelect="false" will cause the SolrDispatchFilter to
+       ignore "/select" requests, resulting in a 404 unless a handler
+       is explicitly registered with the name "/select"
+
+       handleSelect="true" is not recommended for new users, but is the default
+       for backwards compatibility
+    -->
+  <requestDispatcher handleSelect="false" >
+    <!-- Request Parsing
+
+         These settings indicate how Solr Requests may be parsed, and
+         what restrictions may be placed on the ContentStreams from
+         those requests
+
+         enableRemoteStreaming - enables use of the stream.file
+         and stream.url parameters for specifying remote streams.
+
+         multipartUploadLimitInKB - specifies the max size (in KiB) of
+         Multipart File Uploads that Solr will allow in a Request.
+         
+         formdataUploadLimitInKB - specifies the max size (in KiB) of
+         form data (application/x-www-form-urlencoded) sent via
+         POST. You can use POST to pass request parameters not
+         fitting into the URL.
+         
+         addHttpRequestToContext - if set to true, it will instruct
+         the requestParsers to include the original HttpServletRequest
+         object in the context map of the SolrQueryRequest under the 
+         key "httpRequest". It will not be used by any of the existing
+         Solr components, but may be useful when developing custom 
+         plugins.
+         
+         *** WARNING ***
+         The settings below authorize Solr to fetch remote files, You
+         should make sure your system has some authentication before
+         using enableRemoteStreaming="true"
+
+      --> 
+    <requestParsers enableRemoteStreaming="true" 
+                    multipartUploadLimitInKB="2048000"
+                    formdataUploadLimitInKB="2048"
+                    addHttpRequestToContext="false"/>
+
+    <!-- HTTP Caching
+
+         Set HTTP caching related parameters (for proxy caches and clients).
+
+         The options below instruct Solr not to output any HTTP Caching
+         related headers
+      -->
+    <httpCaching never304="true" />
+    <!-- If you include a <cacheControl> directive, it will be used to
+         generate a Cache-Control header (as well as an Expires header
+         if the value contains "max-age=")
+         
+         By default, no Cache-Control header is generated.
+         
+         You can use the <cacheControl> option even if you have set
+         never304="true"
+      -->
+    <!--
+       <httpCaching never304="true" >
+         <cacheControl>max-age=30, public</cacheControl> 
+       </httpCaching>
+      -->
+    <!-- To enable Solr to respond with automatically generated HTTP
+         Caching headers, and to response to Cache Validation requests
+         correctly, set the value of never304="false"
+         
+         This will cause Solr to generate Last-Modified and ETag
+         headers based on the properties of the Index.
+
+         The following options can also be specified to affect the
+         values of these headers...
+
+         lastModFrom - the default value is "openTime" which means the
+         Last-Modified value (and validation against If-Modified-Since
+         requests) will all be relative to when the current Searcher
+         was opened.  You can change it to lastModFrom="dirLastMod" if
+         you want the value to exactly correspond to when the physical
+         index was last modified.
+
+         etagSeed="..." is an option you can change to force the ETag
+         header (and validation against If-None-Match requests) to be
+         different even if the index has not changed (ie: when making
+         significant changes to your config file)
+
+         (lastModifiedFrom and etagSeed are both ignored if you use
+         the never304="true" option)
+      -->
+    <!--
+       <httpCaching lastModifiedFrom="openTime"
+                    etagSeed="Solr">
+         <cacheControl>max-age=30, public</cacheControl> 
+       </httpCaching>
+      -->
+  </requestDispatcher>
+
+  <!-- Request Handlers 
+
+       http://wiki.apache.org/solr/SolrRequestHandler
+
+       Incoming queries will be dispatched to a specific handler by name
+       based on the path specified in the request.
+
+       Legacy behavior: If the request path uses "/select" but no Request
+       Handler has that name, and if handleSelect="true" has been specified in
+       the requestDispatcher, then the Request Handler is dispatched based on
+       the qt parameter.  Handlers without a leading '/' are accessed this way
+       like so: http://host/app/[core/]select?qt=name  If no qt is
+       given, then the requestHandler that declares default="true" will be
+       used or the one named "standard".
+
+       If a Request Handler is declared with startup="lazy", then it will
+       not be initialized until the first request that uses it.
+
+    -->
+  <!-- SearchHandler
+
+       http://wiki.apache.org/solr/SearchHandler
+
+       For processing Search Queries, the primary Request Handler
+       provided with Solr is "SearchHandler" It delegates to a sequent
+       of SearchComponents (see below) and supports distributed
+       queries across multiple shards
+    -->
+  <requestHandler name="/select" class="solr.SearchHandler">
+    <!-- default values for query parameters can be specified, these
+         will be overridden by parameters in the request
+      -->
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+       <int name="rows">10</int>
+       <str name="df">text</str>
+     </lst>
+    <!-- In addition to defaults, "appends" params can be specified
+         to identify values which should be appended to the list of
+         multi-val params from the query (or the existing "defaults").
+      -->
+    <!-- In this example, the param "fq=instock:true" would be appended to
+         any query time fq params the user may specify, as a mechanism for
+         partitioning the index, independent of any user selected filtering
+         that may also be desired (perhaps as a result of faceted searching).
+
+         NOTE: there is *absolutely* nothing a client can do to prevent these
+         "appends" values from being used, so don't use this mechanism
+         unless you are sure you always want it.
+      -->
+    <!--
+       <lst name="appends">
+         <str name="fq">inStock:true</str>
+       </lst>
+      -->
+    <!-- "invariants" are a way of letting the Solr maintainer lock down
+         the options available to Solr clients.  Any params values
+         specified here are used regardless of what values may be specified
+         in either the query, the "defaults", or the "appends" params.
+
+         In this example, the facet.field and facet.query params would
+         be fixed, limiting the facets clients can use.  Faceting is
+         not turned on by default - but if the client does specify
+         facet=true in the request, these are the only facets they
+         will be able to see counts for; regardless of what other
+         facet.field or facet.query params they may specify.
+
+         NOTE: there is *absolutely* nothing a client can do to prevent these
+         "invariants" values from being used, so don't use this mechanism
+         unless you are sure you always want it.
+      -->
+    <!--
+       <lst name="invariants">
+         <str name="facet.field">cat</str>
+         <str name="facet.field">manu_exact</str>
+         <str name="facet.query">price:[* TO 500]</str>
+         <str name="facet.query">price:[500 TO *]</str>
+       </lst>
+      -->
+    <!-- If the default list of SearchComponents is not desired, that
+         list can either be overridden completely, or components can be
+         prepended or appended to the default list.  (see below)
+      -->
+    <!--
+       <arr name="components">
+         <str>nameOfCustomComponent1</str>
+         <str>nameOfCustomComponent2</str>
+       </arr>
+      -->
+    </requestHandler>
+
+  <!-- A request handler that returns indented JSON by default -->
+  <requestHandler name="/query" class="solr.SearchHandler">
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+       <str name="wt">json</str>
+       <str name="indent">true</str>
+       <str name="df">text</str>
+     </lst>
+  </requestHandler>
+
+
+  <!-- realtime get handler, guaranteed to return the latest stored fields of
+       any document, without the need to commit or open a new searcher.  The
+       current implementation relies on the updateLog feature being enabled. -->
+  <requestHandler name="/get" class="solr.RealTimeGetHandler">
+     <lst name="defaults">
+       <str name="omitHeader">true</str>
+       <str name="wt">json</str>
+       <str name="indent">true</str>
+     </lst>
+  </requestHandler>
+
+ 
+  <!-- A Robust Example 
+       
+       This example SearchHandler declaration shows off usage of the
+       SearchHandler with many defaults declared
+
+       Note that multiple instances of the same Request Handler
+       (SearchHandler) can be registered multiple times with different
+       names (and different init parameters)
+    -->
+  <requestHandler name="/browse" class="solr.SearchHandler">
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+
+       <!-- VelocityResponseWriter settings -->
+       <str name="wt">velocity</str>
+       <str name="v.template">browse</str>
+       <str name="v.layout">layout</str>
+       <str name="title">Solritas</str>
+
+       <!-- Query settings -->
+       <str name="defType">edismax</str>
+       <str name="qf">
+          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+          title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+       </str>
+       <str name="df">text</str>
+       <str name="mm">100%</str>
+       <str name="q.alt">*:*</str>
+       <str name="rows">10</str>
+       <str name="fl">*,score</str>
+
+       <str name="mlt.qf">
+         text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+         title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+       </str>
+       <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
+       <int name="mlt.count">3</int>
+
+       <!-- Faceting defaults -->
+       <str name="facet">on</str>
+       <str name="facet.field">cat</str>
+       <str name="facet.field">manu_exact</str>
+       <str name="facet.field">content_type</str>
+       <str name="facet.field">author_s</str>
+       <str name="facet.query">ipod</str>
+       <str name="facet.query">GB</str>
+       <str name="facet.mincount">1</str>
+       <str name="facet.pivot">cat,inStock</str>
+       <str name="facet.range.other">after</str>
+       <str name="facet.range">price</str>
+       <int name="f.price.facet.range.start">0</int>
+       <int name="f.price.facet.range.end">600</int>
+       <int name="f.price.facet.range.gap">50</int>
+       <str name="facet.range">popularity</str>
+       <int name="f.popularity.facet.range.start">0</int>
+       <int name="f.popularity.facet.range.end">10</int>
+       <int name="f.popularity.facet.range.gap">3</int>
+       <str name="facet.range">manufacturedate_dt</str>
+       <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
+       <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
+       <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
+       <str name="f.manufacturedate_dt.facet.range.other">before</str>
+       <str name="f.manufacturedate_dt.facet.range.other">after</str>
+
+       <!-- Highlighting defaults -->
+       <str name="hl">on</str>
+       <str name="hl.fl">content features title name</str>
+       <str name="hl.encoder">html</str>
+       <str name="hl.simple.pre">&lt;b&gt;</str>
+       <str name="hl.simple.post">&lt;/b&gt;</str>
+       <str name="f.title.hl.fragsize">0</str>
+       <str name="f.title.hl.alternateField">title</str>
+       <str name="f.name.hl.fragsize">0</str>
+       <str name="f.name.hl.alternateField">name</str>
+       <str name="f.content.hl.snippets">3</str>
+       <str name="f.content.hl.fragsize">200</str>
+       <str name="f.content.hl.alternateField">content</str>
+       <str name="f.content.hl.maxAlternateFieldLength">750</str>
+
+       <!-- Spell checking defaults -->
+       <str name="spellcheck">on</str>
+       <str name="spellcheck.extendedResults">false</str>       
+       <str name="spellcheck.count">5</str>
+       <str name="spellcheck.alternativeTermCount">2</str>
+       <str name="spellcheck.maxResultsForSuggest">5</str>       
+       <str name="spellcheck.collate">true</str>
+       <str name="spellcheck.collateExtendedResults">true</str>  
+       <str name="spellcheck.maxCollationTries">5</str>
+       <str name="spellcheck.maxCollations">3</str>           
+     </lst>
+
+     <!-- append spellchecking to our list of components -->
+     <arr name="last-components">
+       <str>spellcheck</str>
+     </arr>
+  </requestHandler>
+
+
+  <!-- Update Request Handler.  
+       
+       http://wiki.apache.org/solr/UpdateXmlMessages
+
+       The canonical Request Handler for Modifying the Index through
+       commands specified using XML, JSON, CSV, or JAVABIN
+
+       Note: Since solr1.1 requestHandlers requires a valid content
+       type header if posted in the body. For example, curl now
+       requires: -H 'Content-type:text/xml; charset=utf-8'
+       
+       To override the request content type and force a specific 
+       Content-type, use the request parameter: 
+         ?update.contentType=text/csv
+       
+       This handler will pick a response format to match the input
+       if the 'wt' parameter is not explicit
+    -->
+  <requestHandler name="/update" class="solr.UpdateRequestHandler">
+    <!-- See below for information on defining 
+         updateRequestProcessorChains that can be used by name 
+         on each Update Request
+      -->
+    <!--
+       <lst name="defaults">
+         <str name="update.chain">dedupe</str>
+       </lst>
+       -->
+  </requestHandler>
+
+  <!-- for back compat with clients using /update/json and /update/csv -->  
+  <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
+        <lst name="defaults">
+         <str name="stream.contentType">application/json</str>
+       </lst>
+  </requestHandler>
+  <requestHandler name="/update/csv" class="solr.CSVRequestHandler">
+        <lst name="defaults">
+         <str name="stream.contentType">application/csv</str>
+       </lst>
+  </requestHandler>
+
+  <!-- Solr Cell Update Request Handler
+
+       http://wiki.apache.org/solr/ExtractingRequestHandler 
+
+    -->
+  <requestHandler name="/update/extract" 
+                  startup="lazy"
+                  class="solr.extraction.ExtractingRequestHandler" >
+    <lst name="defaults">
+      <str name="lowernames">true</str>
+      <str name="uprefix">ignored_</str>
+
+      <!-- capture link hrefs but ignore div attributes -->
+      <str name="captureAttr">true</str>
+      <str name="fmap.a">links</str>
+      <str name="fmap.div">ignored_</str>
+    </lst>
+  </requestHandler>
+
+
+  <!-- Field Analysis Request Handler
+
+       RequestHandler that provides much the same functionality as
+       analysis.jsp. Provides the ability to specify multiple field
+       types and field names in the same request and outputs
+       index-time and query-time analysis for each of them.
+
+       Request parameters are:
+       analysis.fieldname - field name whose analyzers are to be used
+
+       analysis.fieldtype - field type whose analyzers are to be used
+       analysis.fieldvalue - text for index-time analysis
+       q (or analysis.q) - text for query time analysis
+       analysis.showmatch (true|false) - When set to true and when
+           query analysis is performed, the produced tokens of the
+           field value analysis will be marked as "matched" for every
+           token that is produces by the query analysis
+   -->
+  <requestHandler name="/analysis/field" 
+                  startup="lazy"
+                  class="solr.FieldAnalysisRequestHandler" />
+
+
+  <!-- Document Analysis Handler
+
+       http://wiki.apache.org/solr/AnalysisRequestHandler
+
+       An analysis handler that provides a breakdown of the analysis
+       process of provided documents. This handler expects a (single)
+       content stream with the following format:
+
+       <docs>
+         <doc>
+           <field name="id">1</field>
+           <field name="name">The Name</field>
+           <field name="text">The Text Value</field>
+         </doc>
+         <doc>...</doc>
+         <doc>...</doc>
+         ...
+       </docs>
+
+    Note: Each document must contain a field which serves as the
+    unique key. This key is used in the returned response to associate
+    an analysis breakdown to the analyzed document.
+
+    Like the FieldAnalysisRequestHandler, this handler also supports
+    query analysis by sending either an "analysis.query" or "q"
+    request parameter that holds the query text to be analyzed. It
+    also supports the "analysis.showmatch" parameter which when set to
+    true, all field tokens that match the query tokens will be marked
+    as a "match". 
+  -->
+  <requestHandler name="/analysis/document" 
+                  class="solr.DocumentAnalysisRequestHandler" 
+                  startup="lazy" />
+
+  <!-- Admin Handlers
+
+       Admin Handlers - This will register all the standard admin
+       RequestHandlers.  
+    -->
+  <requestHandler name="/admin/" 
+                  class="solr.admin.AdminHandlers" />
+  <!-- This single handler is equivalent to the following... -->
+  <!--
+     <requestHandler name="/admin/luke"       class="solr.admin.LukeRequestHandler" />
+     <requestHandler name="/admin/system"     class="solr.admin.SystemInfoHandler" />
+     <requestHandler name="/admin/plugins"    class="solr.admin.PluginInfoHandler" />
+     <requestHandler name="/admin/threads"    class="solr.admin.ThreadDumpHandler" />
+     <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />
+     <requestHandler name="/admin/file"       class="solr.admin.ShowFileRequestHandler" >
+    -->
+  <!-- If you wish to hide files under ${solr.home}/conf, explicitly
+       register the ShowFileRequestHandler using: 
+    -->
+  <!--
+     <requestHandler name="/admin/file" 
+                     class="solr.admin.ShowFileRequestHandler" >
+       <lst name="invariants">
+         <str name="hidden">synonyms.txt</str> 
+         <str name="hidden">anotherfile.txt</str> 
+       </lst>
+     </requestHandler>
+    -->
+
+  <!-- ping/healthcheck -->
+  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
+    <lst name="invariants">
+      <str name="q">solrpingquery</str>
+    </lst>
+    <lst name="defaults">
+      <str name="echoParams">all</str>
+    </lst>
+    <!-- An optional feature of the PingRequestHandler is to configure the 
+         handler with a "healthcheckFile" which can be used to enable/disable 
+         the PingRequestHandler.
+         relative paths are resolved against the data dir 
+      -->
+    <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
+  </requestHandler>
+
+  <!-- Echo the request contents back to the client -->
+  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
+    <lst name="defaults">
+     <str name="echoParams">explicit</str> 
+     <str name="echoHandler">true</str>
+    </lst>
+  </requestHandler>
+  
+  <!-- Solr Replication
+
+       The SolrReplicationHandler supports replicating indexes from a
+       "master" used for indexing and "slaves" used for queries.
+
+       http://wiki.apache.org/solr/SolrReplication 
+
+       It is also necessary for SolrCloud to function (in Cloud mode, the
+       replication handler is used to bulk transfer segments when nodes 
+       are added or need to recover).
+
+       https://wiki.apache.org/solr/SolrCloud/
+    -->
+  <requestHandler name="/replication" class="solr.ReplicationHandler" > 
+    <!--
+       To enable simple master/slave replication, uncomment one of the 
+       sections below, depending on whether this solr instance should be
+       the "master" or a "slave".  If this instance is a "slave" you will 
+       also need to fill in the masterUrl to point to a real machine.
+    -->
+    <!--
+       <lst name="master">
+         <str name="replicateAfter">commit</str>
+         <str name="replicateAfter">startup</str>
+         <str name="confFiles">schema.xml,stopwords.txt</str>
+       </lst>
+    -->
+    <!--
+       <lst name="slave">
+         <str name="masterUrl">http://your-master-hostname:8983/solr</str>
+         <str name="pollInterval">00:00:60</str>
+       </lst>
+    -->
+  </requestHandler>
+
+  <!-- Search Components
+
+       Search components are registered to SolrCore and used by 
+       instances of SearchHandler (which can access them by name)
+       
+       By default, the following components are available:
+       
+       <searchComponent name="query"     class="solr.QueryComponent" />
+       <searchComponent name="facet"     class="solr.FacetComponent" />
+       <searchComponent name="mlt"       class="solr.MoreLikeThisComponent" />
+       <searchComponent name="highlight" class="solr.HighlightComponent" />
+       <searchComponent name="stats"     class="solr.StatsComponent" />
+       <searchComponent name="debug"     class="solr.DebugComponent" />
+   
+       Default configuration in a requestHandler would look like:
+
+       <arr name="components">
+         <str>query</str>
+         <str>facet</str>
+         <str>mlt</str>
+         <str>highlight</str>
+         <str>stats</str>
+         <str>debug</str>
+       </arr>
+
+       If you register a searchComponent to one of the standard names, 
+       that will be used instead of the default.
+
+       To insert components before or after the 'standard' components, use:
+    
+       <arr name="first-components">
+         <str>myFirstComponentName</str>
+       </arr>
+    
+       <arr name="last-components">
+         <str>myLastComponentName</str>
+       </arr>
+
+       NOTE: The component registered with the name "debug" will
+       always be executed after the "last-components" 
+       
+     -->
+  
+   <!-- Spell Check
+
+        The spell check component can return a list of alternative spelling
+        suggestions.  
+
+        http://wiki.apache.org/solr/SpellCheckComponent
+     -->
+  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
+
+    <str name="queryAnalyzerFieldType">text_general</str>
+
+    <!-- Multiple "Spell Checkers" can be declared and used by this
+         component
+      -->
+
+    <!-- a spellchecker built from a field of the main index -->
+    <lst name="spellchecker">
+      <str name="name">default</str>
+      <str name="field">text</str>
+      <str name="classname">solr.DirectSolrSpellChecker</str>
+      <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
+      <str name="distanceMeasure">internal</str>
+      <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
+      <float name="accuracy">0.5</float>
+      <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
+      <int name="maxEdits">2</int>
+      <!-- the minimum shared prefix when enumerating terms -->
+      <int name="minPrefix">1</int>
+      <!-- maximum number of inspections per result. -->
+      <int name="maxInspections">5</int>
+      <!-- minimum length of a query term to be considered for correction -->
+      <int name="minQueryLength">4</int>
+      <!-- maximum threshold of documents a query term can appear to be considered for correction -->
+      <float name="maxQueryFrequency">0.01</float>
+      <!-- uncomment this to require suggestions to occur in 1% of the documents
+       <float name="thresholdTokenFrequency">.01</float>
+      -->
+    </lst>
+    
+    <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage -->
+    <lst name="spellchecker">
+      <str name="name">wordbreak</str>
+      <str name="classname">solr.WordBreakSolrSpellChecker</str>      
+      <str name="field">name</str>
+      <str name="combineWords">true</str>
+      <str name="breakWords">true</str>
+      <int name="maxChanges">10</int>
+    </lst>
+
+    <!-- a spellchecker that uses a different distance measure -->
+    <!--
+       <lst name="spellchecker">
+         <str name="name">jarowinkler</str>
+         <str name="field">spell</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
+         <str name="distanceMeasure">
+           org.apache.lucene.search.spell.JaroWinklerDistance
+         </str>
+       </lst>
+     -->
+
+    <!-- a spellchecker that use an alternate comparator 
+
+         comparatorClass be one of:
+          1. score (default)
+          2. freq (Frequency first, then score)
+          3. A fully qualified class name
+      -->
+    <!--
+       <lst name="spellchecker">
+         <str name="name">freq</str>
+         <str name="field">lowerfilt</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
+         <str name="comparatorClass">freq</str>
+      -->
+
+    <!-- A spellchecker that reads the list of words from a file -->
+    <!--
+       <lst name="spellchecker">
+         <str name="classname">solr.FileBasedSpellChecker</str>
+         <str name="name">file</str>
+         <str name="sourceLocation">spellings.txt</str>
+         <str name="characterEncoding">UTF-8</str>
+         <str name="spellcheckIndexDir">spellcheckerFile</str>
+       </lst>
+      -->
+  </searchComponent>
+
+  <!-- A request handler for demonstrating the spellcheck component.  
+
+       NOTE: This is purely as an example.  The whole purpose of the
+       SpellCheckComponent is to hook it into the request handler that
+       handles your normal user queries so that a separate request is
+       not needed to get suggestions.
+
+       IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
+       NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
+       
+       See http://wiki.apache.org/solr/SpellCheckComponent for details
+       on the request parameters.
+    -->
+  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <str name="df">text</str>
+      <!-- Solr will use suggestions from both the 'default' spellchecker
+           and from the 'wordbreak' spellchecker and combine them.
+           collations (re-written queries) can include a combination of
+           corrections from both spellcheckers -->
+      <str name="spellcheck.dictionary">default</str>
+      <str name="spellcheck.dictionary">wordbreak</str>
+      <str name="spellcheck">on</str>
+      <str name="spellcheck.extendedResults">true</str>       
+      <str name="spellcheck.count">10</str>
+      <str name="spellcheck.alternativeTermCount">5</str>
+      <str name="spellcheck.maxResultsForSuggest">5</str>       
+      <str name="spellcheck.collate">true</str>
+      <str name="spellcheck.collateExtendedResults">true</str>  
+      <str name="spellcheck.maxCollationTries">10</str>
+      <str name="spellcheck.maxCollations">5</str>         
+    </lst>
+    <arr name="last-components">
+      <str>spellcheck</str>
+    </arr>
+  </requestHandler>
+
+  <!-- Term Vector Component
+
+       http://wiki.apache.org/solr/TermVectorComponent
+    -->
+  <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
+
+  <!-- A request handler for demonstrating the term vector component
+
+       This is purely as an example.
+
+       In reality you will likely want to add the component to your 
+       already specified request handlers. 
+    -->
+  <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <str name="df">text</str>
+      <bool name="tv">true</bool>
+    </lst>
+    <arr name="last-components">
+      <str>tvComponent</str>
+    </arr>
+  </requestHandler>
+
+  <!-- Clustering Component
+
+       http://wiki.apache.org/solr/ClusteringComponent
+
+       You'll need to set the solr.clustering.enabled system property
+       when running solr to run with clustering enabled:
+
+            java -Dsolr.clustering.enabled=true -jar start.jar
+
+    -->
+  <searchComponent name="clustering"
+                   enable="${solr.clustering.enabled:false}"
+                   class="solr.clustering.ClusteringComponent" >
+    <!-- Declare an engine -->
+    <lst name="engine">
+      <!-- The name, only one can be named "default" -->
+      <str name="name">default</str>
+
+      <!-- Class name of Carrot2 clustering algorithm.
+
+           Currently available algorithms are:
+           
+           * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+           * org.carrot2.clustering.stc.STCClusteringAlgorithm
+           * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
+           
+           See http://project.carrot2.org/algorithms.html for the
+           algorithm's characteristics.
+        -->
+      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+
+      <!-- Overriding values for Carrot2 default algorithm attributes.
+
+           For a description of all available attributes, see:
+           http://download.carrot2.org/stable/manual/#chapter.components.
+           Use attribute key as name attribute of str elements
+           below. These can be further overridden for individual
+           requests by specifying attribute key as request parameter
+           name and attribute value as parameter value.
+        -->
+      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
+
+      <!-- Location of Carrot2 lexical resources.
+
+           A directory from which to load Carrot2-specific stop words
+           and stop labels. Absolute or relative to Solr config directory.
+           If a specific resource (e.g. stopwords.en) is present in the
+           specified dir, it will completely override the corresponding
+           default one that ships with Carrot2.
+
+           For an overview of Carrot2 lexical resources, see:
+           http://download.carrot2.org/head/manual/#chapter.lexical-resources
+        -->
+      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
+
+      <!-- The language to assume for the documents.
+
+           For a list of allowed values, see:
+           http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
+       -->
+      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
+    </lst>
+    <lst name="engine">
+      <str name="name">stc</str>
+      <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
+    </lst>
+  </searchComponent>
+
+  <!-- A request handler for demonstrating the clustering component
+
+       This is purely as an example.
+
+       In reality you will likely want to add the component to your 
+       already specified request handlers. 
+    -->
+  <requestHandler name="/clustering"
+                  startup="lazy"
+                  enable="${solr.clustering.enabled:false}"
+                  class="solr.SearchHandler">
+    <lst name="defaults">
+      <bool name="clustering">true</bool>
+      <str name="clustering.engine">default</str>
+      <bool name="clustering.results">true</bool>
+      <!-- The title field -->
+      <str name="carrot.title">name</str>
+      <str name="carrot.url">id</str>
+      <!-- The field to cluster on -->
+       <str name="carrot.snippet">features</str>
+       <!-- produce summaries -->
+       <bool name="carrot.produceSummary">true</bool>
+       <!-- the maximum number of labels per cluster -->
+       <!--<int name="carrot.numDescriptions">5</int>-->
+       <!-- produce sub clusters -->
+       <bool name="carrot.outputSubClusters">false</bool>
+       
+       <str name="defType">edismax</str>
+       <str name="qf">
+         text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+       </str>
+       <str name="q.alt">*:*</str>
+       <str name="rows">10</str>
+       <str name="fl">*,score</str>
+    </lst>     
+    <arr name="last-components">
+      <str>clustering</str>
+    </arr>
+  </requestHandler>
+  
+  <!-- Terms Component
+
+       http://wiki.apache.org/solr/TermsComponent
+
+       A component to return terms and document frequency of those
+       terms
+    -->
+  <searchComponent name="terms" class="solr.TermsComponent"/>
+
+  <!-- A request handler for demonstrating the terms component -->
+  <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
+     <lst name="defaults">
+      <bool name="terms">true</bool>
+      <bool name="distrib">false</bool>
+    </lst>     
+    <arr name="components">
+      <str>terms</str>
+    </arr>
+  </requestHandler>
+
+
+  <!-- Query Elevation Component
+
+       http://wiki.apache.org/solr/QueryElevationComponent
+
+       a search component that enables you to configure the top
+       results for a given query regardless of the normal lucene
+       scoring.
+    -->
+  <searchComponent name="elevator" class="solr.QueryElevationComponent" >
+    <!-- pick a fieldType to analyze queries -->
+    <str name="queryFieldType">string</str>
+    <str name="config-file">elevate.xml</str>
+  </searchComponent>
+
+  <!-- A request handler for demonstrating the elevator component -->
+  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <str name="echoParams">explicit</str>
+      <str name="df">text</str>
+    </lst>
+    <arr name="last-components">
+      <str>elevator</str>
+    </arr>
+  </requestHandler>
+
+  <!-- Highlighting Component
+
+       http://wiki.apache.org/solr/HighlightingParameters
+    -->
+  <searchComponent class="solr.HighlightComponent" name="highlight">
+    <highlighting>
+      <!-- Configure the standard fragmenter -->
+      <!-- This could most likely be commented out in the "default" case -->
+      <fragmenter name="gap" 
+                  default="true"
+                  class="solr.highlight.GapFragmenter">
+        <lst name="defaults">
+          <int name="hl.fragsize">100</int>
+        </lst>
+      </fragmenter>
+
+      <!-- A regular-expression-based fragmenter 
+           (for sentence extraction) 
+        -->
+      <fragmenter name="regex" 
+                  class="solr.highlight.RegexFragmenter">
+        <lst name="defaults">
+          <!-- slightly smaller fragsizes work better because of slop -->
+          <int name="hl.fragsize">70</int>
+          <!-- allow 50% slop on fragment sizes -->
+          <float name="hl.regex.slop">0.5</float>
+          <!-- a basic sentence pattern -->
+          <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
+        </lst>
+      </fragmenter>
+
+      <!-- Configure the standard formatter -->
+      <formatter name="html" 
+                 default="true"
+                 class="solr.highlight.HtmlFormatter">
+        <lst name="defaults">
+          <str name="hl.simple.pre"><![CDATA[<em>]]></str>
+          <str name="hl.simple.post"><![CDATA[</em>]]></str>
+        </lst>
+      </formatter>
+
+      <!-- Configure the standard encoder -->
+      <encoder name="html" 
+               class="solr.highlight.HtmlEncoder" />
+
+      <!-- Configure the standard fragListBuilder -->
+      <fragListBuilder name="simple" 
+                       class="solr.highlight.SimpleFragListBuilder"/>
+      
+      <!-- Configure the single fragListBuilder -->
+      <fragListBuilder name="single" 
+                       class="solr.highlight.SingleFragListBuilder"/>
+      
+      <!-- Configure the weighted fragListBuilder -->
+      <fragListBuilder name="weighted" 
+                       default="true"
+                       class="solr.highlight.WeightedFragListBuilder"/>
+      
+      <!-- default tag FragmentsBuilder -->
+      <fragmentsBuilder name="default" 
+                        default="true"
+                        class="solr.highlight.ScoreOrderFragmentsBuilder">
+        <!-- 
+        <lst name="defaults">
+          <str name="hl.multiValuedSeparatorChar">/</str>
+        </lst>
+        -->
+      </fragmentsBuilder>
+
+      <!-- multi-colored tag FragmentsBuilder -->
+      <fragmentsBuilder name="colored" 
+                        class="solr.highlight.ScoreOrderFragmentsBuilder">
+        <lst name="defaults">
+          <str name="hl.tag.pre"><![CDATA[
+               <b style="background:yellow">,<b style="background:lawgreen">,
+               <b style="background:aquamarine">,<b style="background:magenta">,
+               <b style="background:palegreen">,<b style="background:coral">,
+               <b style="background:wheat">,<b style="background:khaki">,
+               <b style="background:lime">,<b style="background:deepskyblue">]]></str>
+          <str name="hl.tag.post"><![CDATA[</b>]]></str>
+        </lst>
+      </fragmentsBuilder>
+      
+      <boundaryScanner name="default" 
+                       default="true"
+                       class="solr.highlight.SimpleBoundaryScanner">
+        <lst name="defaults">
+          <str name="hl.bs.maxScan">10</str>
+          <str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
+        </lst>
+      </boundaryScanner>
+      
+      <boundaryScanner name="breakIterator" 
+                       class="solr.highlight.BreakIteratorBoundaryScanner">
+        <lst name="defaults">
+          <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
+          <str name="hl.bs.type">WORD</str>
+          <!-- language and country are used when constructing Locale object.  -->
+          <!-- And the Locale object will be used when getting instance of BreakIterator -->
+          <str name="hl.bs.language">en</str>
+          <str name="hl.bs.country">US</str>
+        </lst>
+      </boundaryScanner>
+    </highlighting>
+  </searchComponent>
+
+  <!-- Update Processors
+
+       Chains of Update Processor Factories for dealing with Update
+       Requests can be declared, and then used by name in Update
+       Request Processors
+
+       http://wiki.apache.org/solr/UpdateRequestProcessor
+
+    --> 
+  <!-- Deduplication
+
+       An example dedup update processor that creates the "id" field
+       on the fly based on the hash code of some other fields.  This
+       example has overwriteDupes set to false since we are using the
+       id field as the signatureField and Solr will maintain
+       uniqueness based on that anyway.  
+       
+    -->
+  <!--
+     <updateRequestProcessorChain name="dedupe">
+       <processor class="solr.processor.SignatureUpdateProcessorFactory">
+         <bool name="enabled">true</bool>
+         <str name="signatureField">id</str>
+         <bool name="overwriteDupes">false</bool>
+         <str name="fields">name,features,cat</str>
+         <str name="signatureClass">solr.processor.Lookup3Signature</str>
+       </processor>
+       <processor class="solr.LogUpdateProcessorFactory" />
+       <processor class="solr.RunUpdateProcessorFactory" />
+     </updateRequestProcessorChain>
+    -->
+  
+  <!-- Language identification
+
+       This example update chain identifies the language of the incoming
+       documents using the langid contrib. The detected language is
+       written to field language_s. No field name mapping is done.
+       The fields used for detection are text, title, subject and description,
+       making this example suitable for detecting languages form full-text
+       rich documents injected via ExtractingRequestHandler.
+       See more about langId at http://wiki.apache.org/solr/LanguageDetection
+    -->
+    <!--
+     <updateRequestProcessorChain name="langid">
+       <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
+         <str name="langid.fl">text,title,subject,description</str>
+         <str name="langid.langField">language_s</str>
+         <str name="langid.fallback">en</str>
+       </processor>
+       <processor class="solr.LogUpdateProcessorFactory" />
+       <processor class="solr.RunUpdateProcessorFactory" />
+     </updateRequestProcessorChain>
+    -->
+
+  <!-- Script update processor
+
+    This example hooks in an update processor implemented using JavaScript.
+
+    See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
+  -->
+  <!--
+    <updateRequestProcessorChain name="script">
+      <processor class="solr.StatelessScriptUpdateProcessorFactory">
+        <str name="script">update-script.js</str>
+        <lst name="params">
+          <str name="config_param">example config parameter</str>
+        </lst>
+      </processor>
+      <processor class="solr.RunUpdateProcessorFactory" />
+    </updateRequestProcessorChain>
+  -->
+ 
+  <!-- Response Writers
+
+       http://wiki.apache.org/solr/QueryResponseWriter
+
+       Request responses will be written using the writer specified by
+       the 'wt' request parameter matching the name of a registered
+       writer.
+
+       The "default" writer is the default and will be used if 'wt' is
+       not specified in the request.
+    -->
+  <!-- The following response writers are implicitly configured unless
+       overridden...
+    -->
+  <!--
+     <queryResponseWriter name="xml" 
+                          default="true"
+                          class="solr.XMLResponseWriter" />
+     <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
+     <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
+     <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
+     <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
+     <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
+     <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
+     <queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/>
+    -->
+
+  <queryResponseWriter name="json" class="solr.JSONResponseWriter">
+     <!-- For the purposes of the tutorial, JSON responses are written as
+      plain text so that they are easy to read in *any* browser.
+      If you expect a MIME type of "application/json" just remove this override.
+     -->
+    <str name="content-type">text/plain; charset=UTF-8</str>
+  </queryResponseWriter>
+  
+  <!--
+     Custom response writers can be declared as needed...
+    -->
+    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
+  
+
+  <!-- XSLT response writer transforms the XML output by any xslt file found
+       in Solr's conf/xslt directory.  Changes to xslt files are checked for
+       every xsltCacheLifetimeSeconds.  
+    -->
+  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
+    <int name="xsltCacheLifetimeSeconds">5</int>
+  </queryResponseWriter>
+
+  <!-- Query Parsers
+
+       http://wiki.apache.org/solr/SolrQuerySyntax
+
+       Multiple QParserPlugins can be registered by name, and then
+       used in either the "defType" param for the QueryComponent (used
+       by SearchHandler) or in LocalParams
+    -->
+  <!-- example of registering a query parser -->
+  <!--
+     <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
+    -->
+
+  <!-- Function Parsers
+
+       http://wiki.apache.org/solr/FunctionQuery
+
+       Multiple ValueSourceParsers can be registered by name, and then
+       used as function names when using the "func" QParser.
+    -->
+  <!-- example of registering a custom function parser  -->
+  <!--
+     <valueSourceParser name="myfunc" 
+                        class="com.mycompany.MyValueSourceParser" />
+    -->
+    
+  
+  <!-- Document Transformers
+       http://wiki.apache.org/solr/DocTransformers
+    -->
+  <!--
+     Could be something like:
+     <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
+       <int name="connection">jdbc://....</int>
+     </transformer>
+     
+     To add a constant value to all docs, use:
+     <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
+       <int name="value">5</int>
+     </transformer>
+     
+     If you want the user to still be able to change it with _value:something_ use this:
+     <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
+       <double name="defaultValue">5</double>
+     </transformer>
+
+      If you are using the QueryElevationComponent, you may wish to mark documents that get boosted.  The
+      EditorialMarkerFactory will do exactly that:
+     <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
+    -->
+    
+
+  <!-- Legacy config for the admin interface -->
+  <admin>
+    <defaultQuery>*:*</defaultQuery>
+  </admin>
+
+</config>
diff --git a/zookeeper/solr/collection1/conf/spellings.txt b/zookeeper/solr/collection1/conf/spellings.txt

new file mode 100644 (file)

index 0000000..d7ede6f
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/spellings.txt
@@ -0,0 +1,2 @@
+pizza
+history
\ No newline at end of file
diff --git a/zookeeper/solr/collection1/conf/stopwords.txt b/zookeeper/solr/collection1/conf/stopwords.txt

new file mode 100644 (file)

index 0000000..ae1e83e
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/stopwords.txt
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/zookeeper/solr/collection1/conf/synonyms.txt b/zookeeper/solr/collection1/conf/synonyms.txt

new file mode 100644 (file)

index 0000000..7f72128
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/synonyms.txt
@@ -0,0 +1,29 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
diff --git a/zookeeper/solr/collection1/conf/update-script.js b/zookeeper/solr/collection1/conf/update-script.js

new file mode 100644 (file)

index 0000000..49b07f9
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/update-script.js
@@ -0,0 +1,53 @@
+/*
+  This is a basic skeleton JavaScript update processor.
+
+  In order for this to be executed, it must be properly wired into solrconfig.xml; by default it is commented out in
+  the example solrconfig.xml and must be uncommented to be enabled.
+
+  See http://wiki.apache.org/solr/ScriptUpdateProcessor for more details.
+*/
+
+function processAdd(cmd) {
+
+  doc = cmd.solrDoc;  // org.apache.solr.common.SolrInputDocument
+  id = doc.getFieldValue("id");
+  logger.info("update-script#processAdd: id=" + id);
+
+// Set a field value:
+//  doc.setField("foo_s", "whatever");
+
+// Get a configuration parameter:
+//  config_param = params.get('config_param');  // "params" only exists if processor configured with <lst name="params">
+
+// Get a request parameter:
+// some_param = req.getParams().get("some_param")
+
+// Add a field of field names that match a pattern:
+//   - Potentially useful to determine the fields/attributes represented in a result set, via faceting on field_name_ss
+//  field_names = doc.getFieldNames().toArray();
+//  for(i=0; i < field_names.length; i++) {
+//    field_name = field_names[i];
+//    if (/attr_.*/.test(field_name)) { doc.addField("attribute_ss", field_names[i]); }
+//  }
+
+}
+
+function processDelete(cmd) {
+  // no-op
+}
+
+function processMergeIndexes(cmd) {
+  // no-op
+}
+
+function processCommit(cmd) {
+  // no-op
+}
+
+function processRollback(cmd) {
+  // no-op
+}
+
+function finish() {
+  // no-op
+}
diff --git a/zookeeper/solr/collection1/conf/velocity/README.txt b/zookeeper/solr/collection1/conf/velocity/README.txt

new file mode 100644 (file)

index 0000000..5d560ba
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/README.txt
@@ -0,0 +1,101 @@
+Introduction
+------------
+Solr Search Velocity Templates
+
+A quick demo of using Solr using http://wiki.apache.org/solr/VelocityResponseWriter
+
+You typically access these templates via:
+       http://localhost:8983/solr/collection1/browse
+
+It's called "browse" because you can click around with your mouse
+without needing to type any search terms.  And of course it
+also works as a standard search app as well.
+
+Known Limitations
+-----------------
+* The /browse and the VelocityResponseWriter component
+  serve content directly from Solr, which usually requires
+  Solr's HTTP API to be exposed.  Advanced users could
+  potentially access other parts of Solr directly.
+* There are some hard coded fields in these templates.
+  Since these templates live under conf, they should be
+  considered part of the overall configuration, and
+  must be coordinated with schema.xml and solrconfig.xml
+
+Velocity Info
+-------------
+Java-based template language.
+
+It's nice in this context because change to the templates
+are immediately visible in browser on the next visit.
+
+Links:
+       http://velocity.apache.org
+       http://wiki.apache.org/velocity/
+       http://velocity.apache.org/engine/releases/velocity-1.7/user-guide.html
+
+
+File List
+---------
+
+System and Misc:
+  VM_global_library.vm    - Macros used other templates,
+                            exact filename is important for Velocity to see it
+  error.vm                - shows errors, if any
+  debug.vm                - includes toggle links for "explain" and "all fields"
+                            activated by debug link in footer.vm
+  README.txt              - this file
+
+Overall Page Composition:
+  browse.vm               - Main entry point into templates
+  layout.vm               - overall HTML page layout
+  head.vm                 - elements in the <head> section of the HTML document
+  header.vm               - top section of page visible to users
+  footer.vm               - bottom section of page visible to users,
+                            includes debug and help links
+  main.css                - CSS style for overall pages
+                            see also jquery.autocomplete.css
+
+Query Form and Options:
+  query_form.vm           - renders query form
+  query_group.vm          - group by fields
+                            e.g.: Manufacturer or Poplularity
+  query_spatial.vm        - select box for location based Geospacial search
+
+Spelling Suggestions:
+  did_you_mean.vm         - hyperlinked spelling suggestions in results
+  suggest.vm              - dynamic spelling suggestions
+                            as you type in the search form
+  jquery.autocomplete.js  - supporting files for dynamic suggestions
+  jquery.autocomplete.css - Most CSS is defined in main.css
+
+
+Search Results, General:
+  (see also browse.vm)
+  tabs.vm                 - provides navigation to advanced search options
+  pagination_top.vm       - paging and staticis at top of results
+  pagination_bottom.vm    - paging and staticis at bottom of results
+  results_list.vm
+  hit.vm                  - called for each matching doc,
+                            decides which template to use
+  hit_grouped.vm          - display results grouped by field values
+  product_doc.vm          - display a Product
+  join_doc.vm             - display a joined document
+  richtext_doc.vm         - display a complex/misc. document
+  hit_plain.vm            - basic display of all fields,
+                            edit results_list.vm to enable this
+
+
+Search Results, Facets & Clusters:
+  facets.vm               - calls the 4 facet and 1 cluster template
+  facet_fields.vm         - display facets based on field values
+                            e.g.: fields specified by &facet.field=
+  facet_queries.vm        - display facets based on specific facet queries
+                            e.g.: facets specified by &facet.query=
+  facet_ranges.vm         - display facets based on ranges
+                            e.g.: ranges specified by &facet.range=
+  facet_pivot.vm          - display pivot based facets
+                            e.g.: facets specified by &facet.pivot=
+  cluster.vm              - if clustering is available
+                            then call cluster_results.vm
+  cluster_results.vm      - actual rendering of clusters
diff --git a/zookeeper/solr/collection1/conf/velocity/VM_global_library.vm b/zookeeper/solr/collection1/conf/velocity/VM_global_library.vm

new file mode 100644 (file)

index 0000000..5dda07c
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/VM_global_library.vm
@@ -0,0 +1,175 @@
+#**
+ *  Global macros used by other templates.
+ *  This file must be named VM_global_library.vm
+ *  in order for Velocity to find it.
+ *#
+
+#macro(param $key)$request.params.get($key)#end
+
+#macro(url_root)/solr#end
+
+## TODO: s/url_for_solr/url_for_core/ and s/url_root/url_for_solr/
+#macro(core_name)$request.core.name#end
+#macro(url_for_solr)#{url_root}#if($request.core.name != "")/$request.core.name#end#end
+#macro(url_for_home)#url_for_solr/browse#end
+
+#macro(q)&q=$!{esc.url($params.get('q'))}#end
+
+#macro(fqs $p)#foreach($fq in $p)#if($velocityCount>1)&#{end}fq=$esc.url($fq)#end#end
+
+#macro(debug)#if($request.params.get('debugQuery'))&debugQuery=true#end#end
+
+#macro(boostPrice)#if($request.params.get('bf') == 'price')&bf=price#end#end        
+
+#macro(annotate)#if($request.params.get('annotateBrowse'))&annotateBrowse=true#end#end
+
+#macro(annTitle $msg)#if($annotate == true)title="$msg"#end#end
+
+#macro(spatial)#if($request.params.get('sfield'))&sfield=store#end#if($request.params.get('pt'))&pt=$request.params.get('pt')#end#if($request.params.get('d'))&d=$request.params.get('d')#end#end
+
+#macro(qOpts)#set($queryOpts = $request.params.get("queryOpts"))#if($queryOpts && $queryOpts != "")&queryOpts=$queryOpts#end#end
+
+#macro(group)#if($request.params.getBool("group") == true)&group=true#end#if($request.params.get("group.field"))#foreach($grp in $request.params.getParams('group.field'))&group.field=$grp#end#end#end
+
+#macro(sort $p)#if($p)#foreach($s in $p)&sort=$esc.url($s)#end#end#end
+
+#macro(lensNoQ)?#if($request.params.getParams('fq') and $list.size($request.params.getParams('fq')) > 0)&#fqs($request.params.getParams('fq'))#end#sort($request.params.getParams('sort'))#debug#boostPrice#annotate#spatial#qOpts#group#end
+#macro(lens)#lensNoQ#q#end
+        
+
+#macro(url_for_lens)#{url_for_home}#lens#end
+
+#macro(url_for_start $start)#url_for_home#lens&start=$start#end
+
+#macro(url_for_filters $p)#url_for_home?#q#boostPrice#spatial#qOpts#if($list.size($p) > 0)&#fqs($p)#end#debug#end
+
+#macro(url_for_nested_facet_query $field)#url_for_home#lens&fq=$esc.url($field)#end
+
+## TODO: convert to use {!raw f=$field}$value (with escaping of course)
+#macro(url_for_facet_filter $field $value)#url_for_home#lens&fq=$esc.url($field):%22$esc.url($value)%22#end
+
+#macro(url_for_facet_date_filter $field $value)#url_for_home#lens&fq=$esc.url($field):$esc.url($value)#end
+
+#macro(url_for_facet_range_filter $field $value)#url_for_home#lens&fq=$esc.url($field):$esc.url($value)#end
+
+
+#macro(link_to_previous_page $text)
+  #if($page.current_page_number > 1)
+    #set($prev_start = $page.start - $page.results_per_page)
+    <a class="prev-page" href="#url_for_start($prev_start)">$text</a>
+  #end
+#end
+
+#macro(link_to_next_page $text)
+  #if($page.current_page_number < $page.page_count)
+    #set($next_start = $page.start + $page.results_per_page)
+    <a class="next-page" href="#url_for_start($next_start)">$text</a>
+  #end
+#end
+
+#macro(link_to_page $page_number $text)
+  #if($page_number == $page.current_page_number)
+    $text
+  #else
+    #if($page_number <= $page.page_count)
+      #set($page_start = $page_number * $page.results_per_page - $page.results_per_page)
+      <a class="page" href="#url_for_start($page_start)">$text</a>
+    #end
+  #end
+#end
+
+#macro(display_facet_query $field, $display, $fieldName)
+  #if($field.size() > 0)
+  <span class="facet-field">$display</span>
+    <ul>
+    #foreach ($facet in $field)
+      #if ($facet.value > 0)
+        #set($facetURL = "#url_for_nested_facet_query($facet.key)")
+        #if ($facetURL != '')
+          <li><a href="$facetURL">$facet.key</a> ($facet.value)</li>
+        #end
+      #end
+    #end
+    </ul>
+  #end      
+#end
+
+
+#macro(display_facet_range $field, $display, $fieldName, $start, $end, $gap, $before, $after)
+  <span class="facet-field">$display</span>
+    <ul>
+    #if($before && $before != "")
+      #set($value = "[* TO " + "#format_value($start)" + "}")
+      #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)")
+      <li><a href="$facetURL">Less than #format_value($start)</a> ($before)</li>
+    #end
+    #foreach ($facet in $field)
+      #set($rangeEnd = "#range_get_to_value($facet.key, $gap)")
+      #set($value = "[" + $facet.key + " TO " + $rangeEnd + "}")
+      #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)")
+      #if ($facetURL != '')
+        <li><a href="$facetURL">$facet.key - #format_value($rangeEnd)</a> ($facet.value)</li>
+      #end
+    #end
+    #if($end && $end != "" && $after > 0)
+      #set($value = "[" + "#format_value($end)" + " TO *}")
+      #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)")
+      <li><a href="$facetURL">More than #format_value($end)</a> ($after)</li>
+    #end
+    </ul>
+#end
+
+## $pivots is a list of facet_pivot
+#macro(display_facet_pivot $pivots, $display)
+  #if($pivots.size() > 0)
+  <span class="facet-field">$display</span>
+    <ul>
+      #foreach ($pivot in $pivots)
+        #foreach ($entry in $pivot.value)
+          <a href="#url_for_facet_filter($entry.field, $entry.value)">$entry.field::$entry.value</a> ($entry.count)
+          <ul>
+            #foreach($nest in $entry.pivot)
+              <a href="#url_for_facet_filter($entry.field, $entry.value)&fq=$esc.url($nest.field):%22$esc.url($nest.value)%22">$nest.field::$nest.value</a> ($nest.count)
+            #end
+          </ul>
+        #end
+      #end
+    </ul>
+  #end
+#end
+
+#macro(field $f)
+  #if($response.response.highlighting.get($docId).get($f).get(0))
+    #set($pad = "")
+    #foreach($v in $response.response.highlighting.get($docId).get($f))
+$pad$v##
+      #set($pad = " ... ")
+    #end
+  #else
+    #foreach($v in $doc.getFieldValues($f))
+$v##
+    #end
+  #end
+#end  
+
+#macro(utc_date $theDate)
+$date.format("yyyy-MM-dd'T'HH:mm:ss'Z'",$theDate,$date.getLocale(),$date.getTimeZone().getTimeZone("UTC"))##
+#end
+
+#macro(format_value $val)
+#if(${val.class.name} == "java.util.Date")
+#utc_date($val)##
+#else
+$val##
+#end
+#end
+
+#macro(range_get_to_value $inval, $gapval)
+#if(${gapval.class.name} == "java.lang.String")
+$inval$gapval##
+#elseif(${gapval.class.name} == "java.lang.Float" || ${inval.class.name} == "java.lang.Float")
+$math.toDouble($math.add($inval,$gapval))##
+#else
+$math.add($inval,$gapval)##
+#end
+#end
diff --git a/zookeeper/solr/collection1/conf/velocity/browse.vm b/zookeeper/solr/collection1/conf/velocity/browse.vm

new file mode 100644 (file)

index 0000000..10ecaeb
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/browse.vm
@@ -0,0 +1,33 @@
+#**
+ *  Main entry point into the /browse templates
+ *#
+
+#set($searcher = $request.searcher)
+#set($params = $request.params)
+#set($clusters = $response.response.clusters)
+#set($mltResults = $response.response.get("moreLikeThis"))
+#set($annotate = $params.get("annotateBrowse"))
+#parse('query_form.vm')
+#parse('did_you_mean.vm')
+
+<div class="navigators">
+  #parse("facets.vm")
+</div>
+
+<div class="pagination">
+  #parse("pagination_top.vm")
+</div>
+
+## Show Error Message, if any
+<div class="error">
+  #parse("error.vm")
+</div>
+
+## Render Results, actual matching docs
+<div class="results">
+  #parse("results_list.vm")
+</div>
+
+<div class="pagination">
+  #parse("pagination_bottom.vm")
+</div>
diff --git a/zookeeper/solr/collection1/conf/velocity/cluster.vm b/zookeeper/solr/collection1/conf/velocity/cluster.vm

new file mode 100644 (file)

index 0000000..4957071
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/cluster.vm
@@ -0,0 +1,19 @@
+#**
+ *  Check if Clustering is Enabled and then
+ *  call cluster_results.vm
+ *#
+
+<h2 #annTitle("Clusters generated by Carrot2 using the /clustering RequestHandler")>
+  Clusters
+</h2>
+
+## Div tag has placeholder text by default
+<div id="clusters">
+  Run Solr with java -Dsolr.clustering.enabled=true -jar start.jar to see results
+</div>
+
+## Replace the div content *if* Carrot^2 is available
+<script type="text/javascript">
+  $('#clusters').load("#url_for_solr/clustering#lens",
+    {'wt':'velocity', 'v.template':"cluster_results"});
+</script>
diff --git a/zookeeper/solr/collection1/conf/velocity/cluster_results.vm b/zookeeper/solr/collection1/conf/velocity/cluster_results.vm

new file mode 100644 (file)

index 0000000..204480d
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/cluster_results.vm
@@ -0,0 +1,31 @@
+#**
+ *  Actual rendering of Clusters
+ *#
+
+## For each cluster
+#foreach ($clusters in $response.response.clusters)
+
+  #set($labels = $clusters.get('labels'))
+  #set($docs = $clusters.get('docs'))
+
+  ## This Cluster's Heading
+  <h3>
+    #foreach ($label in $labels)
+      ## Keep the following line together to prevent
+      ## a space appearing before each comma
+      $label#if( $foreach.hasNext ),#end
+    #end
+  </h3>
+
+  ## This Cluster's Documents
+  <ol>
+    ## For each doc in this cluster
+    #foreach ($cluDoc in $docs)
+      <li>
+        <a href="#url_for_home?q=id:$cluDoc">
+          $cluDoc</a>
+      </li>
+    #end
+  </ol>
+
+#end   ## end for each Cluster
diff --git a/zookeeper/solr/collection1/conf/velocity/debug.vm b/zookeeper/solr/collection1/conf/velocity/debug.vm

new file mode 100644 (file)

index 0000000..8f6d232
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/debug.vm
@@ -0,0 +1,28 @@
+#**
+ *  Show Debugging Information, if enabled
+ *#
+
+#if( $params.getBool("debugQuery",false) )
+  <a href="#" onclick='jQuery(this).siblings("pre").toggle(); return false;'>
+    toggle explain</a>
+
+  <pre style="display:none">
+    $response.getExplainMap().get($doc.getFirstValue('id'))
+  </pre>
+
+  <a href="#" onclick='jQuery(this).siblings("pre2").toggle(); return false;'>
+    toggle all fields</a>
+
+  <pre2 style="display:none">
+    #foreach($fieldname in $doc.fieldNames)
+      <br>
+        <span class="field-name">$fieldname :</span>
+        <span>
+          #foreach($value in $doc.getFieldValues($fieldname))
+            $esc.html($value)
+          #end
+        </span>
+      </br>
+    #end
+  </pre2>
+#end
diff --git a/zookeeper/solr/collection1/conf/velocity/did_you_mean.vm b/zookeeper/solr/collection1/conf/velocity/did_you_mean.vm

new file mode 100644 (file)

index 0000000..b8faaa5
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/did_you_mean.vm
@@ -0,0 +1,9 @@
+#**
+ *  Hyperlinked spelling suggestions in results list
+ *#
+
+#set($dym = $response.response.spellcheck.suggestions.collation.collationQuery)
+#if($dym)
+  Did you mean
+  <a href="#{url_for_home}#{lensNoQ}&q=$esc.url($dym)">$esc.html($dym)</a>?
+#end
diff --git a/zookeeper/solr/collection1/conf/velocity/error.vm b/zookeeper/solr/collection1/conf/velocity/error.vm

new file mode 100644 (file)

index 0000000..80b5819
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/error.vm
@@ -0,0 +1,11 @@
+#**
+ *  Show Error Message, if any
+ *#
+
+## Show Error Message, if any
+## Usually rendered inside div class=error
+
+#if( $response.response.error.code )
+  <h1>ERROR $response.response.error.code</h1>
+  $response.response.error.msg
+#end
diff --git a/zookeeper/solr/collection1/conf/velocity/facet_fields.vm b/zookeeper/solr/collection1/conf/velocity/facet_fields.vm

new file mode 100644 (file)

index 0000000..d9db659
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/facet_fields.vm
@@ -0,0 +1,23 @@
+#**
+ *  Display facets based on field values
+ *  e.g.: fields specified by &facet.field=
+ *#
+
+#if($response.facetFields)
+  <h2 #annTitle("Facets generated by adding &facet.field= to the request")>
+    Field Facets
+  </h2>
+  #foreach($field in $response.facetFields)
+    ## Hide facets without value
+    #if($field.values.size() > 0)
+      <span class="facet-field">$field.name</span>
+      <ul>
+        #foreach($facet in $field.values)
+          <li>
+            <a href="#url_for_facet_filter($field.name, $facet.name)">$facet.name</a> ($facet.count)
+          </li>
+        #end
+      </ul>
+    #end  ## end if > 0
+  #end    ## end for each facet field
+#end      ## end if response has facet fields
diff --git a/zookeeper/solr/collection1/conf/velocity/facet_pivot.vm b/zookeeper/solr/collection1/conf/velocity/facet_pivot.vm

new file mode 100644 (file)

index 0000000..7aa50da
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/facet_pivot.vm
@@ -0,0 +1,12 @@
+#**
+ *  Display Pivot-Based Facets
+ *  e.g.: facets specified by &facet.pivot=
+ *#
+
+<h2 #annTitle("Facets generated by adding &facet.pivot= to the request")>
+  Pivot Facets
+</h2>
+
+#set($pivot = $response.response.facet_counts.facet_pivot)
+
+#display_facet_pivot($pivot, "")
diff --git a/zookeeper/solr/collection1/conf/velocity/facet_queries.vm b/zookeeper/solr/collection1/conf/velocity/facet_queries.vm

new file mode 100644 (file)

index 0000000..37489c7
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/facet_queries.vm
@@ -0,0 +1,12 @@
+#**
+ *  Display facets based on specific facet queries
+ *  e.g.: facets specified by &facet.query=
+ *#
+
+#set($field = $response.response.facet_counts.facet_queries)
+
+<h2 #annTitle("Facets generated by adding &facet.query= to the request")>
+  Query Facets
+</h2>
+
+#display_facet_query($field, "", "")
diff --git a/zookeeper/solr/collection1/conf/velocity/facet_ranges.vm b/zookeeper/solr/collection1/conf/velocity/facet_ranges.vm

new file mode 100644 (file)

index 0000000..a61084b
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/facet_ranges.vm
@@ -0,0 +1,23 @@
+#**
+ *  Display facets based on ranges of values, AKA "Bukets"
+ *  e.g.: ranges specified by &facet.range=
+ *#
+
+<h2 #annTitle("Facets generated by adding &facet.range= to the request")>
+  Range Facets
+</h2>
+
+#foreach ($field in $response.response.facet_counts.facet_ranges)
+  ## Hide facets without value
+  #if($field.value.counts.size() > 0)
+       #set($name = $field.key)
+       #set($display = $name)
+       #set($f = $field.value.counts)
+       #set($start = $field.value.start)
+       #set($end = $field.value.end)
+       #set($gap = $field.value.gap)
+       #set($before = $field.value.before)
+       #set($after = $field.value.after)
+       #display_facet_range($f, $display, $name, $start, $end, $gap, $before, $after)
+  #end  ## end if has any values
+#end    ## end for each facet range
diff --git a/zookeeper/solr/collection1/conf/velocity/facets.vm b/zookeeper/solr/collection1/conf/velocity/facets.vm

new file mode 100644 (file)

index 0000000..55d40c9
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/facets.vm
@@ -0,0 +1,10 @@
+#**
+ *  Overall Facet display block
+ *  Invokes the 4 facet and 1 cluster template
+ *#
+
+#parse('facet_fields.vm')
+#parse('facet_queries.vm')
+#parse('facet_ranges.vm')
+#parse('facet_pivot.vm')
+#parse('cluster.vm')
diff --git a/zookeeper/solr/collection1/conf/velocity/footer.vm b/zookeeper/solr/collection1/conf/velocity/footer.vm

new file mode 100644 (file)

index 0000000..0604c34
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/footer.vm
@@ -0,0 +1,43 @@
+#**
+ *  Render the bottom section of the page visible to users
+ *#
+
+<hr/>
+<div>
+  <span>Options:</span>
+
+  #if($request.params.get('debugQuery'))
+    <a href="#url_for_home?#q#if($list.size($request.params.getParams('fq')) > 0)&#fqs($request.params.getParams('fq'))#end">
+      disable debug</a>
+  #else
+    <a href="#url_for_lens&debugQuery=true&fl=*,score">
+      enable debug</a>
+  #end
+  -
+  #if($annotate)
+    <a href="#url_for_home?#q#if($list.size($request.params.getParams('fq')) > 0)&#fqs($request.params.getParams('fq'))#end#boostPrice">
+      disable annotation</a>
+  #else
+    <a href="#url_for_lens&annotateBrowse=true">
+      enable annotation</a>
+  #end
+  -
+  <a #annTitle("Click to switch to an XML response: &wt=xml") href="#url_for_lens&wt=xml#if($request.params.get('debugQuery'))&debugQuery=true#end">
+    XML results</a>
+
+</div>
+
+<div>
+  Generated by <a href="http://wiki.apache.org/solr/VelocityResponseWriter">VelocityResponseWriter</a>
+</div>
+<div>
+  <span>Documentation: </span>
+  <a href="http://lucene.apache.org/solr">Solr Home Page</a>, <a href="http://wiki.apache.org/solr">
+    Solr Wiki</a>
+  </div>
+<div>
+  Disclaimer:
+  The locations displayed in this demonstration are purely fictional.
+  It is more than likely that no store with the items listed actually
+  exists at that location!
+</div>
diff --git a/zookeeper/solr/collection1/conf/velocity/head.vm b/zookeeper/solr/collection1/conf/velocity/head.vm

new file mode 100644 (file)

index 0000000..d1f6ee6
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/head.vm
@@ -0,0 +1,35 @@
+#**
+ *  Provide elements for the <head> section of the HTML document
+ *#
+
+  ## An example of using an arbitrary request parameter
+  <title>#param('title')</title>
+  <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
+
+  <script type="text/javascript" src="#{url_root}/js/lib/jquery-1.7.2.min.js"></script>
+  <link rel="stylesheet" type="text/css" href="#{url_for_solr}/admin/file?file=/velocity/main.css&contentType=text/css"/>
+  <link rel="stylesheet" href="#{url_for_solr}/admin/file?file=/velocity/jquery.autocomplete.css&contentType=text/css" type="text/css" />
+  <script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/jquery.autocomplete.js&contentType=text/javascript"></script>
+
+
+    <script>
+    $(document).ready(function(){
+      $("\#q").autocomplete('#{url_for_solr}/terms', {  ## backslash escaped #q as that is a macro defined in VM_global_library.vm
+           extraParams:{
+             'terms.prefix': function() { return $("\#q").val();},
+             'terms.sort': 'count',
+             'terms.fl': 'name',
+             'wt': 'velocity',
+             'v.template': 'suggest'
+           }
+         }
+      ).keydown(function(e){
+        if (e.keyCode === 13){
+          $("#query-form").trigger('submit');
+        }
+      });
+
+      // http://localhost:8983/solr/collection1/terms?terms.fl=name&terms.prefix=i&terms.sort=count&wt=velocity&v.template=suggest
+    });
+
+    </script>
diff --git a/zookeeper/solr/collection1/conf/velocity/header.vm b/zookeeper/solr/collection1/conf/velocity/header.vm

new file mode 100644 (file)

index 0000000..6866047
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/header.vm
@@ -0,0 +1,7 @@
+#**
+ *  Render the top section of the page visible to users
+ *#
+
+<div id="head">
+  <span ><a href="#url_for_home#if($request.params.get('debugQuery'))?debugQuery=true#end"><img src="#{url_root}/img/solr.png" id="logo"/></a></span>
+</div>
diff --git a/zookeeper/solr/collection1/conf/velocity/hit.vm b/zookeeper/solr/collection1/conf/velocity/hit.vm

new file mode 100644 (file)

index 0000000..a9c11f4
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/hit.vm
@@ -0,0 +1,25 @@
+#**
+ *  Called for each matching document but then
+ *  calls one of product_doc, join_doc or richtext_doc
+ *  depending on which fields the doc has
+ *#
+
+#set($docId = $doc.getFieldValue('id'))
+
+<div class="result-document">
+
+  ## Has a "name" field ?
+  #if($doc.getFieldValue('name'))
+    #parse("product_doc.vm")
+
+  ## Has a "compName_s" field ?
+  #elseif($doc.getFieldValue('compName_s'))
+    #parse("join_doc.vm")
+
+  ## Fallback to richtext_doc
+  #else
+    #parse("richtext_doc.vm")
+
+  #end
+
+</div>
diff --git a/zookeeper/solr/collection1/conf/velocity/hit_grouped.vm b/zookeeper/solr/collection1/conf/velocity/hit_grouped.vm

new file mode 100644 (file)

index 0000000..5297f1e
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/hit_grouped.vm
@@ -0,0 +1,43 @@
+#**
+ *  Display grouped results
+ *#
+
+<div class="result-document">
+
+  <div class="result-title">
+    <b>$grouping.key</b>
+  </div>
+
+  <div>
+    Total Matches in Group: $grouping.value.matches
+  </div>
+
+  <div>  ## list of groups
+
+    #foreach ($group in $grouping.value.groups)
+      <div class="group-value">
+        #if($group.groupValue)$group.groupValue#{else}<i>No group</i>#end
+        <span #annTitle("The count of the number of documents in this group")>
+          ($group.doclist.numFound)
+        </span>
+      </div>
+
+      <div class="group-doclist"
+        #annTitle("Contains the top scoring documents in the group")
+      >
+        #foreach ($doc in $group.doclist)
+          #set($docId = $doc.getFieldValue('id'))
+          #if($doc.getFieldValue('name'))
+            #parse("product_doc.vm")
+          #elseif($doc.getFieldValue('compName_s'))
+            #parse("join_doc.vm")
+          #else
+            #parse("richtext_doc.vm")
+          #end
+        #end
+      </div>
+
+    #end  ## end of foreach group in grouping.value.groups
+  </div>  ## div tag for entire list of groups
+
+</div>  ## end of div class=result-document
diff --git a/zookeeper/solr/collection1/conf/velocity/hit_plain.vm b/zookeeper/solr/collection1/conf/velocity/hit_plain.vm

new file mode 100644 (file)

index 0000000..193439b
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/hit_plain.vm
@@ -0,0 +1,25 @@
+#**
+ *  An extremely plain / debug version of hit.vm
+ *#
+
+<table>
+  ## For each field
+  #foreach( $fieldName in $doc.fieldNames )
+    ## For each value
+    #foreach( $value in $doc.getFieldValues($fieldName) )
+      <tr>
+        ## Field Name
+        <th align="right" valign="top">
+          #if( $foreach.count == 1 )
+            $fieldName:
+          #end
+        </th>
+        ## Field Value(s)
+        <td align="left" valign="top">
+          $esc.html($value) <br/>
+        </td>
+      </tr>
+    #end     ## end for each value
+  #end       ## end for each field
+</table>
+<hr/>
diff --git a/zookeeper/solr/collection1/conf/velocity/join_doc.vm b/zookeeper/solr/collection1/conf/velocity/join_doc.vm

new file mode 100644 (file)

index 0000000..9956012
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/join_doc.vm
@@ -0,0 +1,20 @@
+#**
+ *  Display documents that are joined to other documents
+ *#
+
+<div class="result-title">
+  <b>#field('compName_s')</b>
+</div>
+
+<div>
+  Id: #field('id')
+  (company-details document for
+    <a href="http://wiki.apache.org/solr/Join" target="_new">join</a>
+  )
+</div>
+
+<div>
+  Address: #field('address_s')
+</div>
+
+#parse('debug.vm')
diff --git a/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.css b/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.css

new file mode 100644 (file)

index 0000000..91b6228
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.css
@@ -0,0 +1,48 @@
+.ac_results {
+       padding: 0px;
+       border: 1px solid black;
+       background-color: white;
+       overflow: hidden;
+       z-index: 99999;
+}
+
+.ac_results ul {
+       width: 100%;
+       list-style-position: outside;
+       list-style: none;
+       padding: 0;
+       margin: 0;
+}
+
+.ac_results li {
+       margin: 0px;
+       padding: 2px 5px;
+       cursor: default;
+       display: block;
+       /* 
+       if width will be 100% horizontal scrollbar will apear 
+       when scroll mode will be used
+       */
+       /*width: 100%;*/
+       font: menu;
+       font-size: 12px;
+       /* 
+       it is very important, if line-height not setted or setted 
+       in relative units scroll will be broken in firefox
+       */
+       line-height: 16px;
+       overflow: hidden;
+}
+
+.ac_loading {
+       background: white url('indicator.gif') right center no-repeat;
+}
+
+.ac_odd {
+       background-color: #eee;
+}
+
+.ac_over {
+       background-color: #0A246A;
+       color: white;
+}
diff --git a/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.js b/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.js

new file mode 100644 (file)

index 0000000..09bb376
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.js
@@ -0,0 +1,763 @@
+/*
+ * Autocomplete - jQuery plugin 1.1pre
+ *
+ * Copyright (c) 2007 Dylan Verheul, Dan G. Switzer, Anjesh Tuladhar, Jörn Zaefferer
+ *
+ * Dual licensed under the MIT and GPL licenses:
+ *   http://www.opensource.org/licenses/mit-license.php
+ *   http://www.gnu.org/licenses/gpl.html
+ *
+ * Revision: $Id: jquery.autocomplete.js 5785 2008-07-12 10:37:33Z joern.zaefferer $
+ *
+ */
+
+;(function($) {
+       
+$.fn.extend({
+       autocomplete: function(urlOrData, options) {
+               var isUrl = typeof urlOrData == "string";
+               options = $.extend({}, $.Autocompleter.defaults, {
+                       url: isUrl ? urlOrData : null,
+                       data: isUrl ? null : urlOrData,
+                       delay: isUrl ? $.Autocompleter.defaults.delay : 10,
+                       max: options && !options.scroll ? 10 : 150
+               }, options);
+               
+               // if highlight is set to false, replace it with a do-nothing function
+               options.highlight = options.highlight || function(value) { return value; };
+               
+               // if the formatMatch option is not specified, then use formatItem for backwards compatibility
+               options.formatMatch = options.formatMatch || options.formatItem;
+               
+               return this.each(function() {
+                       new $.Autocompleter(this, options);
+               });
+       },
+       result: function(handler) {
+               return this.bind("result", handler);
+       },
+       search: function(handler) {
+               return this.trigger("search", [handler]);
+       },
+       flushCache: function() {
+               return this.trigger("flushCache");
+       },
+       setOptions: function(options){
+               return this.trigger("setOptions", [options]);
+       },
+       unautocomplete: function() {
+               return this.trigger("unautocomplete");
+       }
+});
+
+$.Autocompleter = function(input, options) {
+
+       var KEY = {
+               UP: 38,
+               DOWN: 40,
+               DEL: 46,
+               TAB: 9,
+               RETURN: 13,
+               ESC: 27,
+               COMMA: 188,
+               PAGEUP: 33,
+               PAGEDOWN: 34,
+               BACKSPACE: 8
+       };
+
+       // Create $ object for input element
+       var $input = $(input).attr("autocomplete", "off").addClass(options.inputClass);
+
+       var timeout;
+       var previousValue = "";
+       var cache = $.Autocompleter.Cache(options);
+       var hasFocus = 0;
+       var lastKeyPressCode;
+       var config = {
+               mouseDownOnSelect: false
+       };
+       var select = $.Autocompleter.Select(options, input, selectCurrent, config);
+       
+       var blockSubmit;
+       
+       // prevent form submit in opera when selecting with return key
+       $.browser.opera && $(input.form).bind("submit.autocomplete", function() {
+               if (blockSubmit) {
+                       blockSubmit = false;
+                       return false;
+               }
+       });
+       
+       // only opera doesn't trigger keydown multiple times while pressed, others don't work with keypress at all
+       $input.bind(($.browser.opera ? "keypress" : "keydown") + ".autocomplete", function(event) {
+               // track last key pressed
+               lastKeyPressCode = event.keyCode;
+               switch(event.keyCode) {
+               
+                       case KEY.UP:
+                               event.preventDefault();
+                               if ( select.visible() ) {
+                                       select.prev();
+                               } else {
+                                       onChange(0, true);
+                               }
+                               break;
+                               
+                       case KEY.DOWN:
+                               event.preventDefault();
+                               if ( select.visible() ) {
+                                       select.next();
+                               } else {
+                                       onChange(0, true);
+                               }
+                               break;
+                               
+                       case KEY.PAGEUP:
+                               event.preventDefault();
+                               if ( select.visible() ) {
+                                       select.pageUp();
+                               } else {
+                                       onChange(0, true);
+                               }
+                               break;
+                               
+                       case KEY.PAGEDOWN:
+                               event.preventDefault();
+                               if ( select.visible() ) {
+                                       select.pageDown();
+                               } else {
+                                       onChange(0, true);
+                               }
+                               break;
+                       
+                       // matches also semicolon
+                       case options.multiple && $.trim(options.multipleSeparator) == "," && KEY.COMMA:
+                       case KEY.TAB:
+                       case KEY.RETURN:
+                               if( selectCurrent() ) {
+                                       // stop default to prevent a form submit, Opera needs special handling
+                                       event.preventDefault();
+                                       blockSubmit = true;
+                                       return false;
+                               }
+                               break;
+                               
+                       case KEY.ESC:
+                               select.hide();
+                               break;
+                               
+                       default:
+                               clearTimeout(timeout);
+                               timeout = setTimeout(onChange, options.delay);
+                               break;
+               }
+       }).focus(function(){
+               // track whether the field has focus, we shouldn't process any
+               // results if the field no longer has focus
+               hasFocus++;
+       }).blur(function() {
+               hasFocus = 0;
+               if (!config.mouseDownOnSelect) {
+                       hideResults();
+               }
+       }).click(function() {
+               // show select when clicking in a focused field
+               if ( hasFocus++ > 1 && !select.visible() ) {
+                       onChange(0, true);
+               }
+       }).bind("search", function() {
+               // TODO why not just specifying both arguments?
+               var fn = (arguments.length > 1) ? arguments[1] : null;
+               function findValueCallback(q, data) {
+                       var result;
+                       if( data && data.length ) {
+                               for (var i=0; i < data.length; i++) {
+                                       if( data[i].result.toLowerCase() == q.toLowerCase() ) {
+                                               result = data[i];
+                                               break;
+                                       }
+                               }
+                       }
+                       if( typeof fn == "function" ) fn(result);
+                       else $input.trigger("result", result && [result.data, result.value]);
+               }
+               $.each(trimWords($input.val()), function(i, value) {
+                       request(value, findValueCallback, findValueCallback);
+               });
+       }).bind("flushCache", function() {
+               cache.flush();
+       }).bind("setOptions", function() {
+               $.extend(options, arguments[1]);
+               // if we've updated the data, repopulate
+               if ( "data" in arguments[1] )
+                       cache.populate();
+       }).bind("unautocomplete", function() {
+               select.unbind();
+               $input.unbind();
+               $(input.form).unbind(".autocomplete");
+       });
+       
+       
+       function selectCurrent() {
+               var selected = select.selected();
+               if( !selected )
+                       return false;
+               
+               var v = selected.result;
+               previousValue = v;
+               
+               if ( options.multiple ) {
+                       var words = trimWords($input.val());
+                       if ( words.length > 1 ) {
+                               v = words.slice(0, words.length - 1).join( options.multipleSeparator ) + options.multipleSeparator + v;
+                       }
+                       v += options.multipleSeparator;
+               }
+               
+               $input.val(v);
+               hideResultsNow();
+               $input.trigger("result", [selected.data, selected.value]);
+               return true;
+       }
+       
+       function onChange(crap, skipPrevCheck) {
+               if( lastKeyPressCode == KEY.DEL ) {
+                       select.hide();
+                       return;
+               }
+               
+               var currentValue = $input.val();
+               
+               if ( !skipPrevCheck && currentValue == previousValue )
+                       return;
+               
+               previousValue = currentValue;
+               
+               currentValue = lastWord(currentValue);
+               if ( currentValue.length >= options.minChars) {
+                       $input.addClass(options.loadingClass);
+                       if (!options.matchCase)
+                               currentValue = currentValue.toLowerCase();
+                       request(currentValue, receiveData, hideResultsNow);
+               } else {
+                       stopLoading();
+                       select.hide();
+               }
+       };
+       
+       function trimWords(value) {
+               if ( !value ) {
+                       return [""];
+               }
+               var words = value.split( options.multipleSeparator );
+               var result = [];
+               $.each(words, function(i, value) {
+                       if ( $.trim(value) )
+                               result[i] = $.trim(value);
+               });
+               return result;
+       }
+       
+       function lastWord(value) {
+               if ( !options.multiple )
+                       return value;
+               var words = trimWords(value);
+               return words[words.length - 1];
+       }
+       
+       // fills in the input box w/the first match (assumed to be the best match)
+       // q: the term entered
+       // sValue: the first matching result
+       function autoFill(q, sValue){
+               // autofill in the complete box w/the first match as long as the user hasn't entered in more data
+               // if the last user key pressed was backspace, don't autofill
+               if( options.autoFill && (lastWord($input.val()).toLowerCase() == q.toLowerCase()) && lastKeyPressCode != KEY.BACKSPACE ) {
+                       // fill in the value (keep the case the user has typed)
+                       $input.val($input.val() + sValue.substring(lastWord(previousValue).length));
+                       // select the portion of the value not typed by the user (so the next character will erase)
+                       $.Autocompleter.Selection(input, previousValue.length, previousValue.length + sValue.length);
+               }
+       };
+
+       function hideResults() {
+               clearTimeout(timeout);
+               timeout = setTimeout(hideResultsNow, 200);
+       };
+
+       function hideResultsNow() {
+               var wasVisible = select.visible();
+               select.hide();
+               clearTimeout(timeout);
+               stopLoading();
+               if (options.mustMatch) {
+                       // call search and run callback
+                       $input.search(
+                               function (result){
+                                       // if no value found, clear the input box
+                                       if( !result ) {
+                                               if (options.multiple) {
+                                                       var words = trimWords($input.val()).slice(0, -1);
+                                                       $input.val( words.join(options.multipleSeparator) + (words.length ? options.multipleSeparator : "") );
+                                               }
+                                               else
+                                                       $input.val( "" );
+                                       }
+                               }
+                       );
+               }
+               if (wasVisible)
+                       // position cursor at end of input field
+                       $.Autocompleter.Selection(input, input.value.length, input.value.length);
+       };
+
+       function receiveData(q, data) {
+               if ( data && data.length && hasFocus ) {
+                       stopLoading();
+                       select.display(data, q);
+                       autoFill(q, data[0].value);
+                       select.show();
+               } else {
+                       hideResultsNow();
+               }
+       };
+
+       function request(term, success, failure) {
+               if (!options.matchCase)
+                       term = term.toLowerCase();
+               var data = cache.load(term);
+               data = null; // Avoid buggy cache and go to Solr every time 
+               // recieve the cached data
+               if (data && data.length) {
+                       success(term, data);
+               // if an AJAX url has been supplied, try loading the data now
+               } else if( (typeof options.url == "string") && (options.url.length > 0) ){
+                       
+                       var extraParams = {
+                               timestamp: +new Date()
+                       };
+                       $.each(options.extraParams, function(key, param) {
+                               extraParams[key] = typeof param == "function" ? param() : param;
+                       });
+                       
+                       $.ajax({
+                               // try to leverage ajaxQueue plugin to abort previous requests
+                               mode: "abort",
+                               // limit abortion to this input
+                               port: "autocomplete" + input.name,
+                               dataType: options.dataType,
+                               url: options.url,
+                               data: $.extend({
+                                       q: lastWord(term),
+                                       limit: options.max
+                               }, extraParams),
+                               success: function(data) {
+                                       var parsed = options.parse && options.parse(data) || parse(data);
+                                       cache.add(term, parsed);
+                                       success(term, parsed);
+                               }
+                       });
+               } else {
+                       // if we have a failure, we need to empty the list -- this prevents the the [TAB] key from selecting the last successful match
+                       select.emptyList();
+                       failure(term);
+               }
+       };
+       
+       function parse(data) {
+               var parsed = [];
+               var rows = data.split("\n");
+               for (var i=0; i < rows.length; i++) {
+                       var row = $.trim(rows[i]);
+                       if (row) {
+                               row = row.split("|");
+                               parsed[parsed.length] = {
+                                       data: row,
+                                       value: row[0],
+                                       result: options.formatResult && options.formatResult(row, row[0]) || row[0]
+                               };
+                       }
+               }
+               return parsed;
+       };
+
+       function stopLoading() {
+               $input.removeClass(options.loadingClass);
+       };
+
+};
+
+$.Autocompleter.defaults = {
+       inputClass: "ac_input",
+       resultsClass: "ac_results",
+       loadingClass: "ac_loading",
+       minChars: 1,
+       delay: 400,
+       matchCase: false,
+       matchSubset: true,
+       matchContains: false,
+       cacheLength: 10,
+       max: 100,
+       mustMatch: false,
+       extraParams: {},
+       selectFirst: false,
+       formatItem: function(row) { return row[0]; },
+       formatMatch: null,
+       autoFill: false,
+       width: 0,
+       multiple: false,
+       multipleSeparator: ", ",
+       highlight: function(value, term) {
+               return value.replace(new RegExp("(?![^&;]+;)(?!<[^<>]*)(" + term.replace(/([\^\$\(\)\[\]\{\}\*\.\+\?\|\\])/gi, "\\$1") + ")(?![^<>]*>)(?![^&;]+;)", "gi"), "<strong>$1</strong>");
+       },
+    scroll: true,
+    scrollHeight: 180
+};
+
+$.Autocompleter.Cache = function(options) {
+
+       var data = {};
+       var length = 0;
+       
+       function matchSubset(s, sub) {
+               if (!options.matchCase) 
+                       s = s.toLowerCase();
+               var i = s.indexOf(sub);
+               if (options.matchContains == "word"){
+                       i = s.toLowerCase().search("\\b" + sub.toLowerCase());
+               }
+               if (i == -1) return false;
+               return i == 0 || options.matchContains;
+       };
+       
+       function add(q, value) {
+               if (length > options.cacheLength){
+                       flush();
+               }
+               if (!data[q]){ 
+                       length++;
+               }
+               data[q] = value;
+       }
+       
+       function populate(){
+               if( !options.data ) return false;
+               // track the matches
+               var stMatchSets = {},
+                       nullData = 0;
+
+               // no url was specified, we need to adjust the cache length to make sure it fits the local data store
+               if( !options.url ) options.cacheLength = 1;
+               
+               // track all options for minChars = 0
+               stMatchSets[""] = [];
+               
+               // loop through the array and create a lookup structure
+               for ( var i = 0, ol = options.data.length; i < ol; i++ ) {
+                       var rawValue = options.data[i];
+                       // if rawValue is a string, make an array otherwise just reference the array
+                       rawValue = (typeof rawValue == "string") ? [rawValue] : rawValue;
+                       
+                       var value = options.formatMatch(rawValue, i+1, options.data.length);
+                       if ( value === false )
+                               continue;
+                               
+                       var firstChar = value.charAt(0).toLowerCase();
+                       // if no lookup array for this character exists, look it up now
+                       if( !stMatchSets[firstChar] ) 
+                               stMatchSets[firstChar] = [];
+
+                       // if the match is a string
+                       var row = {
+                               value: value,
+                               data: rawValue,
+                               result: options.formatResult && options.formatResult(rawValue) || value
+                       };
+                       
+                       // push the current match into the set list
+                       stMatchSets[firstChar].push(row);
+
+                       // keep track of minChars zero items
+                       if ( nullData++ < options.max ) {
+                               stMatchSets[""].push(row);
+                       }
+               };
+
+               // add the data items to the cache
+               $.each(stMatchSets, function(i, value) {
+                       // increase the cache size
+                       options.cacheLength++;
+                       // add to the cache
+                       add(i, value);
+               });
+       }
+       
+       // populate any existing data
+       setTimeout(populate, 25);
+       
+       function flush(){
+               data = {};
+               length = 0;
+       }
+       
+       return {
+               flush: flush,
+               add: add,
+               populate: populate,
+               load: function(q) {
+                       if (!options.cacheLength || !length)
+                               return null;
+                       /* 
+                        * if dealing w/local data and matchContains than we must make sure
+                        * to loop through all the data collections looking for matches
+                        */
+                       if( !options.url && options.matchContains ){
+                               // track all matches
+                               var csub = [];
+                               // loop through all the data grids for matches
+                               for( var k in data ){
+                                       // don't search through the stMatchSets[""] (minChars: 0) cache
+                                       // this prevents duplicates
+                                       if( k.length > 0 ){
+                                               var c = data[k];
+                                               $.each(c, function(i, x) {
+                                                       // if we've got a match, add it to the array
+                                                       if (matchSubset(x.value, q)) {
+                                                               csub.push(x);
+                                                       }
+                                               });
+                                       }
+                               }                               
+                               return csub;
+                       } else 
+                       // if the exact item exists, use it
+                       if (data[q]){
+                               return data[q];
+                       } else
+                       if (options.matchSubset) {
+                               for (var i = q.length - 1; i >= options.minChars; i--) {
+                                       var c = data[q.substr(0, i)];
+                                       if (c) {
+                                               var csub = [];
+                                               $.each(c, function(i, x) {
+                                                       if (matchSubset(x.value, q)) {
+                                                               csub[csub.length] = x;
+                                                       }
+                                               });
+                                               return csub;
+                                       }
+                               }
+                       }
+                       return null;
+               }
+       };
+};
+
+$.Autocompleter.Select = function (options, input, select, config) {
+       var CLASSES = {
+               ACTIVE: "ac_over"
+       };
+       
+       var listItems,
+               active = -1,
+               data,
+               term = "",
+               needsInit = true,
+               element,
+               list;
+       
+       // Create results
+       function init() {
+               if (!needsInit)
+                       return;
+               element = $("<div/>")
+               .hide()
+               .addClass(options.resultsClass)
+               .css("position", "absolute")
+               .appendTo(document.body);
+       
+               list = $("<ul/>").appendTo(element).mouseover( function(event) {
+                       if(target(event).nodeName && target(event).nodeName.toUpperCase() == 'LI') {
+                   active = $("li", list).removeClass(CLASSES.ACTIVE).index(target(event));
+                           $(target(event)).addClass(CLASSES.ACTIVE);            
+               }
+               }).click(function(event) {
+                       $(target(event)).addClass(CLASSES.ACTIVE);
+                       select();
+                       // TODO provide option to avoid setting focus again after selection? useful for cleanup-on-focus
+                       input.focus();
+                       return false;
+               }).mousedown(function() {
+                       config.mouseDownOnSelect = true;
+               }).mouseup(function() {
+                       config.mouseDownOnSelect = false;
+               });
+               
+               if( options.width > 0 )
+                       element.css("width", options.width);
+                       
+               needsInit = false;
+       } 
+       
+       function target(event) {
+               var element = event.target;
+               while(element && element.tagName != "LI")
+                       element = element.parentNode;
+               // more fun with IE, sometimes event.target is empty, just ignore it then
+               if(!element)
+                       return [];
+               return element;
+       }
+
+       function moveSelect(step) {
+               listItems.slice(active, active + 1).removeClass(CLASSES.ACTIVE);
+               movePosition(step);
+        var activeItem = listItems.slice(active, active + 1).addClass(CLASSES.ACTIVE);
+        if(options.scroll) {
+            var offset = 0;
+            listItems.slice(0, active).each(function() {
+                               offset += this.offsetHeight;
+                       });
+            if((offset + activeItem[0].offsetHeight - list.scrollTop()) > list[0].clientHeight) {
+                list.scrollTop(offset + activeItem[0].offsetHeight - list.innerHeight());
+            } else if(offset < list.scrollTop()) {
+                list.scrollTop(offset);
+            }
+        }
+       };
+       
+       function movePosition(step) {
+               active += step;
+               if (active < 0) {
+                       active = listItems.size() - 1;
+               } else if (active >= listItems.size()) {
+                       active = 0;
+               }
+       }
+       
+       function limitNumberOfItems(available) {
+               return options.max && options.max < available
+                       ? options.max
+                       : available;
+       }
+       
+       function fillList() {
+               list.empty();
+               var max = limitNumberOfItems(data.length);
+               for (var i=0; i < max; i++) {
+                       if (!data[i])
+                               continue;
+                       var formatted = options.formatItem(data[i].data, i+1, max, data[i].value, term);
+                       if ( formatted === false )
+                               continue;
+                       var li = $("<li/>").html( options.highlight(formatted, term) ).addClass(i%2 == 0 ? "ac_even" : "ac_odd").appendTo(list)[0];
+                       $.data(li, "ac_data", data[i]);
+               }
+               listItems = list.find("li");
+               if ( options.selectFirst ) {
+                       listItems.slice(0, 1).addClass(CLASSES.ACTIVE);
+                       active = 0;
+               }
+               // apply bgiframe if available
+               if ( $.fn.bgiframe )
+                       list.bgiframe();
+       }
+       
+       return {
+               display: function(d, q) {
+                       init();
+                       data = d;
+                       term = q;
+                       fillList();
+               },
+               next: function() {
+                       moveSelect(1);
+               },
+               prev: function() {
+                       moveSelect(-1);
+               },
+               pageUp: function() {
+                       if (active != 0 && active - 8 < 0) {
+                               moveSelect( -active );
+                       } else {
+                               moveSelect(-8);
+                       }
+               },
+               pageDown: function() {
+                       if (active != listItems.size() - 1 && active + 8 > listItems.size()) {
+                               moveSelect( listItems.size() - 1 - active );
+                       } else {
+                               moveSelect(8);
+                       }
+               },
+               hide: function() {
+                       element && element.hide();
+                       listItems && listItems.removeClass(CLASSES.ACTIVE);
+                       active = -1;
+               },
+               visible : function() {
+                       return element && element.is(":visible");
+               },
+               current: function() {
+                       return this.visible() && (listItems.filter("." + CLASSES.ACTIVE)[0] || options.selectFirst && listItems[0]);
+               },
+               show: function() {
+                       var offset = $(input).offset();
+                       element.css({
+                               width: typeof options.width == "string" || options.width > 0 ? options.width : $(input).width(),
+                               top: offset.top + input.offsetHeight,
+                               left: offset.left
+                       }).show();
+            if(options.scroll) {
+                list.scrollTop(0);
+                list.css({
+                                       maxHeight: options.scrollHeight,
+                                       overflow: 'auto'
+                               });
+                               
+                if($.browser.msie && typeof document.body.style.maxHeight === "undefined") {
+                                       var listHeight = 0;
+                                       listItems.each(function() {
+                                               listHeight += this.offsetHeight;
+                                       });
+                                       var scrollbarsVisible = listHeight > options.scrollHeight;
+                    list.css('height', scrollbarsVisible ? options.scrollHeight : listHeight );
+                                       if (!scrollbarsVisible) {
+                                               // IE doesn't recalculate width when scrollbar disappears
+                                               listItems.width( list.width() - parseInt(listItems.css("padding-left")) - parseInt(listItems.css("padding-right")) );
+                                       }
+                }
+                
+            }
+               },
+               selected: function() {
+                       var selected = listItems && listItems.filter("." + CLASSES.ACTIVE).removeClass(CLASSES.ACTIVE);
+                       return selected && selected.length && $.data(selected[0], "ac_data");
+               },
+               emptyList: function (){
+                       list && list.empty();
+               },
+               unbind: function() {
+                       element && element.remove();
+               }
+       };
+};
+
+$.Autocompleter.Selection = function(field, start, end) {
+       if( field.createTextRange ){
+               var selRange = field.createTextRange();
+               selRange.collapse(true);
+               selRange.moveStart("character", start);
+               selRange.moveEnd("character", end);
+               selRange.select();
+       } else if( field.setSelectionRange ){
+               field.setSelectionRange(start, end);
+       } else {
+               if( field.selectionStart ){
+                       field.selectionStart = start;
+                       field.selectionEnd = end;
+               }
+       }
+       field.focus();
+};
+
+})(jQuery);
\ No newline at end of file
diff --git a/zookeeper/solr/collection1/conf/velocity/layout.vm b/zookeeper/solr/collection1/conf/velocity/layout.vm

new file mode 100644 (file)

index 0000000..50f4c1b
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/layout.vm
@@ -0,0 +1,24 @@
+#**
+ *  Overall HTML page layout
+ *#
+
+<html>
+<head>
+  #parse("head.vm")
+</head>
+  <body>
+    <div id="admin"><a href="#url_root/#/#core_name">Solr Admin</a></div>
+    <div id="header">
+      #parse("header.vm")
+    </div>
+    <div id="tabs">
+      #parse("tabs.vm")
+    </div>
+    <div id="content">
+      $content
+    </div>
+    <div id="footer">
+      #parse("footer.vm")
+    </div>
+  </body>
+</html>
diff --git a/zookeeper/solr/collection1/conf/velocity/main.css b/zookeeper/solr/collection1/conf/velocity/main.css

new file mode 100644 (file)

index 0000000..0aed533
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/main.css
@@ -0,0 +1,230 @@
+#admin{
+  text-align: right;
+  vertical-align: top; 
+}
+
+#head{
+  width: 100%;
+}
+.array-field {
+  border: 2px solid #474747;
+  background: #FFE9D8;
+  padding: 5px;
+  margin: 5px;
+}
+
+.array-field-list li {
+  list-style: circle;
+  margin-left: 20px;
+}
+
+.parsed_query_header {
+  font-family: Helvetica, Arial, sans-serif;
+  font-size: 10pt;
+  font-weight: bold;
+}
+
+.parsed_query {
+  font-family: Courier, Courier New, monospaced;
+  font-size: 10pt;
+  font-weight: normal;
+}
+
+body {
+  font-family: Helvetica, Arial, sans-serif;
+  font-size: 10pt;
+}
+
+a {
+  color: #43a4b1;
+}
+
+.navigators {
+  float: left;
+  margin: 5px;
+  margin-top: 0px;
+  width: 185px;
+  padding: 5px;
+  top: -20px;
+  position: relative;  
+}
+
+.tabs-bar {
+  padding: 5px;
+  width: 100%;
+  border: 1px solid;
+  border-width: 0px 0px 1px 0px;
+}
+.tab {
+  font-weight: bold;
+  padding: 5px;
+  margin: 0px 5px;
+  border: 1px solid;
+  background-color: #dddddd;
+  border-top-left-radius: 4px;
+  border-top-right-radius: 4px;
+}
+.tab:hover {
+  background: #FEC293;
+}
+.tab.selected {
+  background-color: #ffffff;
+  border-bottom: 1px solid #ffffff;
+}
+
+.navigators h2 {
+  background: #FEC293;
+  padding: 2px 5px;
+}
+
+.navigators ul {
+  list-style: none;
+  margin: 0;
+  margin-bottom: 5px;
+  margin-top: 5px;
+  padding-left: 10px;
+}
+
+.navigators ul li {
+  color: #999;
+  padding: 2px;
+}
+
+
+
+.facet-field {
+  font-weight: bold;
+}
+
+.highlight {
+  color: white;
+  background-color: gray;
+  border: 1px black solid;
+}
+
+.highlight-box {
+  margin-left: 15px;
+}
+
+.field-name {
+  font-weight: bold;
+}
+
+.highlighted-facet-field {
+  background: white;
+}
+
+.constraints {
+  margin-top: 10px;
+}
+
+#query-form{
+  width: 80%;
+}
+
+
+
+.query-box, .constraints {
+  padding: 5px;
+  margin: 5px;
+  font-weight: normal;
+  font-size: 24px;
+  letter-spacing: 0.08em;
+}
+
+.query-box #q {
+  margin-left: 8px;
+  width: 60%;
+  height: 50px;
+  border: 1px solid #999;
+  font-size: 1em;
+  padding: 0.4em;
+}
+
+.query-box {
+  
+}
+
+.query-boost {
+  
+  top: 10px;
+  left: 50px;
+  position: relative;
+  font-size: 0.8em;
+}
+
+.query-box .inputs{
+  left: 180px;
+  position: relative;
+  
+}
+
+#logo {
+  margin: 10px;
+  border-style: none;
+}
+
+.pagination {
+  padding-left: 33%;
+  background: #eee;
+  margin: 5px;
+  margin-left: 210px;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+.result-document {
+  border: 1px solid #999;
+  padding: 5px;
+  margin: 5px;
+  margin-left: 210px;
+  margin-bottom: 15px;
+}
+
+.result-document div{
+  padding: 5px;
+}
+
+.result-title{
+  width:60%;
+}
+
+.result-body{
+  background: #ddd;
+}
+
+.mlt{
+  
+}
+
+.map{
+  float: right;
+  position: relative;
+  top: -25px;  
+}
+
+.result-document:nth-child(2n+1) {
+  background-color: #eee;
+}
+
+
+.selected-facet-field {
+  font-weight: bold;
+}
+
+li.show {
+  list-style: disc;
+}
+
+.group-value{
+  font-weight: bold;
+}
+
+.error {
+  color: white;
+  background-color: red;
+  left: 210px;
+  width:80%;
+  position: relative;
+
+}
diff --git a/zookeeper/solr/collection1/conf/velocity/mime_type_lists.vm b/zookeeper/solr/collection1/conf/velocity/mime_type_lists.vm

new file mode 100644 (file)

index 0000000..1468bbd
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/mime_type_lists.vm
@@ -0,0 +1,68 @@
+#**
+ *  Define some Mime-Types, short and long form
+ *#
+
+## MimeType to extension map for detecting file type
+## and showing proper icon
+## List of types match the icons in /solr/img/filetypes
+
+## Short MimeType Names
+## Was called $supportedtypes
+#set($supportedMimeTypes = "7z;ai;aiff;asc;audio;bin;bz2;c;cfc;cfm;chm;class;conf;cpp;cs;css;csv;deb;divx;doc;dot;eml;enc;file;gif;gz;hlp;htm;html;image;iso;jar;java;jpeg;jpg;js;lua;m;mm;mov;mp3;mpg;odc;odf;odg;odi;odp;ods;odt;ogg;pdf;pgp;php;pl;png;ppt;ps;py;ram;rar;rb;rm;rpm;rtf;sig;sql;swf;sxc;sxd;sxi;sxw;tar;tex;tgz;txt;vcf;video;vsd;wav;wma;wmv;xls;xml;xpi;xvid;zip")
+
+## Long Form: map MimeType headers to our Short names
+## Was called $extMap
+#set( $mimeExtensionsMap = {
+   "application/x-7z-compressed": "7z",
+   "application/postscript": "ai",
+   "application/pgp-signature": "asc",
+   "application/octet-stream": "bin",
+   "application/x-bzip2": "bz2",
+   "text/x-c": "c",
+   "application/vnd.ms-htmlhelp": "chm",
+   "application/java-vm": "class",
+   "text/css": "css",
+   "text/csv": "csv",
+   "application/x-debian-package": "deb",
+   "application/msword": "doc",
+   "message/rfc822": "eml",
+   "image/gif": "gif",
+   "application/winhlp": "hlp",
+   "text/html": "html",
+   "application/java-archive": "jar",
+   "text/x-java-source": "java",
+   "image/jpeg": "jpeg",
+   "application/javascript": "js",
+   "application/vnd.oasis.opendocument.chart": "odc",
+   "application/vnd.oasis.opendocument.formula": "odf",
+   "application/vnd.oasis.opendocument.graphics": "odg",
+   "application/vnd.oasis.opendocument.image": "odi",
+   "application/vnd.oasis.opendocument.presentation": "odp",
+   "application/vnd.oasis.opendocument.spreadsheet": "ods",
+   "application/vnd.oasis.opendocument.text": "odt",
+   "application/pdf": "pdf",
+   "application/pgp-encrypted": "pgp",
+   "image/png": "png",
+   "application/vnd.ms-powerpoint": "ppt",
+   "audio/x-pn-realaudio": "ram",
+   "application/x-rar-compressed": "rar",
+   "application/vnd.rn-realmedia": "rm",
+   "application/rtf": "rtf",
+   "application/x-shockwave-flash": "swf",
+   "application/vnd.sun.xml.calc": "sxc",
+   "application/vnd.sun.xml.draw": "sxd",
+   "application/vnd.sun.xml.impress": "sxi",
+   "application/vnd.sun.xml.writer": "sxw",
+   "application/x-tar": "tar",
+   "application/x-tex": "tex",
+   "text/plain": "txt",
+   "text/x-vcard": "vcf",
+   "application/vnd.visio": "vsd",
+   "audio/x-wav": "wav",
+   "audio/x-ms-wma": "wma",
+   "video/x-ms-wmv": "wmv",
+   "application/vnd.ms-excel": "xls",
+   "application/xml": "xml",
+   "application/x-xpinstall": "xpi",
+   "application/zip": "zip"
+})
diff --git a/zookeeper/solr/collection1/conf/velocity/pagination_bottom.vm b/zookeeper/solr/collection1/conf/velocity/pagination_bottom.vm

new file mode 100644 (file)

index 0000000..71b8bdf
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/pagination_bottom.vm
@@ -0,0 +1,22 @@
+#**
+ *  Paging and Statistics at bottom of results
+ *#
+
+## Usually rendered in pagination div tag
+
+#if($response.response.get('grouped'))
+  ## pass
+#else
+
+  #link_to_previous_page("previous")
+
+  <span class="results-found">$page.results_found</span>
+  results found.
+
+  Page <span class="page-num">$page.current_page_number</span>
+    of <span class="page-count">$page.page_count</span>
+
+  #link_to_next_page("next")
+
+#end
+<br/>
diff --git a/zookeeper/solr/collection1/conf/velocity/pagination_top.vm b/zookeeper/solr/collection1/conf/velocity/pagination_top.vm

new file mode 100644 (file)

index 0000000..e0ac8ac
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/pagination_top.vm
@@ -0,0 +1,29 @@
+#**
+ *  Paging and Statistics at top of results
+ *#
+
+## Usually rendered in pagination div tag
+
+## Grouped Results / Not Paginated
+#if($response.response.get('grouped'))
+
+  <span>
+    <span class="results-found">
+      $response.response.get('grouped').size() group(s)
+    </span>
+    found in ${response.responseHeader.QTime} ms
+  </span>
+
+## Regular Results / Use Paging Links if needed
+#else
+
+  <span>
+    <span class="results-found">$page.results_found</span>
+    results found in
+    ${response.responseHeader.QTime} ms
+  </span>
+
+  Page <span class="page-num">$page.current_page_number</span>
+    of <span class="page-count">$page.page_count</span>
+
+#end   ## end else non-grouped results, normal pagination
diff --git a/zookeeper/solr/collection1/conf/velocity/product_doc.vm b/zookeeper/solr/collection1/conf/velocity/product_doc.vm

new file mode 100644 (file)

index 0000000..c878d8c
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/product_doc.vm
@@ -0,0 +1,32 @@
+#**
+ *  Render a hit representing a Product
+ *  assumed to have a field called "name"
+ *#
+
+<div class="result-title"><b>#field('name')</b><span class="mlt">   #if($params.getBool('mlt', false) == false)<a href="#lensNoQ&q=id:$docId&mlt=true">More Like This</a>#end</span></div>
+##do we have a physical store for this product
+#set($store = $doc.getFieldValue('store'))
+#if($store)<div class="map"><img src="http://maps.google.com/maps/api/staticmap?&zoom=12&size=150x80&maptype=roadmap&markers=$doc.getFieldValue('store')&sensor=false" /><div><small><a target="_map" href="http://maps.google.com/?q=$store&amp;source=embed">Larger Map</a></small></div></div>#end
+<div>Id: #field('id')</div>
+<div>Price: #field('price_c')</div>
+<div>Features: #field('features')</div>
+<div>In Stock: #field('inStock')</div>
+<div class="mlt">
+  #set($mlt = $mltResults.get($docId))
+  #set($mltOn = $params.getBool('mlt'))
+  #if($mltOn == true)<div class="field-name">Similar Items</div>#end
+  #if ($mltOn && $mlt && $mlt.size() > 0)
+  <ul>
+    #foreach($mltHit in $mlt)
+      #set($mltId = $mltHit.getFieldValue('id'))
+      <li><div><a href="#url_for_home?q=id:$mltId">$mltId</a></div><div><span class="field-name">Name:</span> $mltHit.getFieldValue('name')</div>
+        <div><span class="field-name">Price:</span> $!number.currency($mltHit.getFieldValue('price')) <span class="field-name">In Stock:</span> $mltHit.getFieldValue('inStock')</div>
+
+      </li>
+    #end
+  </ul>
+  #elseif($mltOn && $mlt.size() == 0)
+    <div>No Similar Items Found</div>
+  #end
+</div>
+#parse('debug.vm')
diff --git a/zookeeper/solr/collection1/conf/velocity/query.vm b/zookeeper/solr/collection1/conf/velocity/query.vm

new file mode 100644 (file)

index 0000000..ddbab3f
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/query.vm
@@ -0,0 +1,42 @@
+<div class="query-box">
+  <form id="query-form" action="#{url_for_home}" method="GET">
+    <div class="inputs">
+      <span #annTitle("Add the query using the &q= parameter")>Find: <input type="text" id="q" name="q" value="$!esc.html($params.get('q'))"/> <input type="submit" id="querySubmit"/> <input type="reset"/></span>
+      <div class="query-boost"><span #annTitle("Add the boost function &bf=price to the query")><input type="checkbox" name="bf" value="price" #if($request.params.get('bf') == 'price')checked="true"#end>Boost by Price</input></span>
+      #parse("querySpatial.vm")
+      #parse("queryGroup.vm")
+      </div>
+  </div>
+
+    #if($request.params.get('debugQuery'))
+      <input type="hidden" name="debugQuery" value="true"/>
+    #end
+    #if($annotate == true)
+      <input type="hidden" name="annotateBrowse" value="true"/>
+    #end
+    #foreach($fq in $request.params.getParams('fq'))
+      #if ($fq != "{!bbox}")
+        <input type="hidden" name="fq" id="allFQs" value="$esc.html($fq)"/>
+      #end
+    #end
+    <div class="constraints" #annTitle("Lists out the &fq filters.  Click to remove.")>
+      #foreach($fq in $params.getParams('fq'))
+        #set($previous_fq_count=$velocityCount - 1)
+        #if($fq != '')
+        &gt; <a style="{text-decoration: line-through;}" href="#url_for_filters($request.params.getParams('fq').subList(0,$previous_fq_count))">$fq</a>
+        #end
+      #end
+    </div>
+    <div class="parsed_query_header">
+     #if($request.params.get('debugQuery'))
+        <a href="#" onclick='jQuery(this).siblings("div").toggle(); return false;'>toggle parsed query</a>
+        <div class="parsed_query" style="display:none">$response.response.debug.parsedquery</div>
+      #end
+      #set($queryOpts = $request.params.get("queryOpts"))
+      #if($queryOpts && $queryOpts != "")
+        <input type="hidden" name="queryOpts" value="$queryOpts"/>
+      #end
+    </div>
+  </form>
+
+</div>
diff --git a/zookeeper/solr/collection1/conf/velocity/query_form.vm b/zookeeper/solr/collection1/conf/velocity/query_form.vm

new file mode 100644 (file)

index 0000000..70a0af2
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/query_form.vm
@@ -0,0 +1,64 @@
+#**
+ *  Renders the main query form
+ *#
+
+<div class="query-box">
+  <form id="query-form" action="#{url_for_home}" method="GET">
+
+    <div class="inputs">
+      <span #annTitle("Add the query using the &q= parameter")>
+        Find:
+        <input type="text" id="q" name="q" value="$!esc.html($params.get('q'))"/>
+        <input type="submit" id="querySubmit"/>
+        <input type="reset"/>
+      </span>
+      <div class="query-boost">
+        <span #annTitle("Add the boost function &bf=price to the query")>
+          <input type="checkbox" name="bf" value="price"
+            #if($request.params.get('bf') == 'price')checked="true"#end
+          >
+            Boost by Price
+          </input>
+        </span>
+      #parse("query_spatial.vm")
+      #parse("query_group.vm")
+      </div>
+  </div>
+
+    #if($request.params.get('debugQuery'))
+      <input type="hidden" name="debugQuery" value="true"/>
+    #end
+    #if($annotate == true)
+      <input type="hidden" name="annotateBrowse" value="true"/>
+    #end
+    #foreach($fq in $request.params.getParams('fq'))
+      #if ($fq != "{!bbox}")
+        <input type="hidden" name="fq" id="allFQs" value="$esc.html($fq)"/>
+      #end
+    #end
+
+    <div class="constraints" #annTitle("Lists out the &fq filters.  Click to remove.")>
+      #foreach($fq in $params.getParams('fq'))
+        #set($previous_fq_count=$velocityCount - 1)
+        #if($fq != '')
+          &gt;
+          <a style="{text-decoration: line-through;}"
+            href="#url_for_filters($request.params.getParams('fq').subList(0,$previous_fq_count))"
+          >$fq</a>
+        #end
+      #end
+    </div>
+
+    <div class="parsed_query_header">
+      #if($request.params.get('debugQuery'))
+        <a href="#" onclick='jQuery(this).siblings("div").toggle(); return false;'>toggle parsed query</a>
+        <div class="parsed_query" style="display:none">$response.response.debug.parsedquery</div>
+      #end
+      #set($queryOpts = $request.params.get("queryOpts"))
+      #if($queryOpts && $queryOpts != "")
+        <input type="hidden" name="queryOpts" value="$queryOpts"/>
+      #end
+    </div>
+
+  </form>
+</div>
diff --git a/zookeeper/solr/collection1/conf/velocity/query_group.vm b/zookeeper/solr/collection1/conf/velocity/query_group.vm

new file mode 100644 (file)

index 0000000..42e5457
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/query_group.vm
@@ -0,0 +1,43 @@
+#**
+ *  Query settings for grouping by fields,
+ *  e.g.: Manufacturer or Popularity
+ *#
+
+#set($queryOpts = $params.get("queryOpts"))
+
+#if($queryOpts == "group")
+  <div>
+    #set($groupF = $request.params.get('group.field'))
+
+    <label #annTitle("Add the &group.field parameter. Multiselect is supported")>
+      Group By:
+      <select id="group" name="group.field" multiple="true">
+        ## TODO: Handle multiple selects correctly
+        ## TODO: fix empty / "No Group" selection
+
+        <option value=""
+          #if($groupF == '')selected="true"#end
+        >
+          No Group
+        </option>
+
+        <option value="manu_exact"
+          #if($groupF == 'manu_exact')selected="true"#end
+        >
+          Manufacturer
+        </option>
+
+        <option value="popularity"
+          #if($groupF == 'popularity')selected="true"#end
+        >
+          Popularity
+        </option>
+
+      </select>
+    </label>  
+
+    <input type="hidden" name="group" value="true"/>
+
+  </div>
+
+#end
diff --git a/zookeeper/solr/collection1/conf/velocity/query_spatial.vm b/zookeeper/solr/collection1/conf/velocity/query_spatial.vm

new file mode 100644 (file)

index 0000000..2bc2044
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/query_spatial.vm
@@ -0,0 +1,75 @@
+#**
+ *  Query logic for selecting location / Geospatial search
+ *#
+
+#set($queryOpts = $params.get("queryOpts"))
+
+#if($queryOpts == "spatial")
+
+  <div>
+
+    #set($loc = $request.params.get('pt'))
+    ## Normalize first trip through to "none" because
+    ## an empty string generates an error message later on
+    #if( ! $loc )
+      #set( $loc = "none" )
+    #end
+
+    #set($dist = $request.params.get('d', "10"))
+
+    ## Cities for The Select List
+    #set( $cities = {
+      "none": "No Filter",
+      "45.17614,-93.87341": "Buffalo, MN",
+      "37.7752,-100.0232": "Dodge City, KS",
+      "35.0752,-97.032": "Oklahoma City, OK",
+      "37.7752,-122.4232": "San Francisco CA"
+    })
+
+    <label #annTitle("Add the &pt parameter")>
+      Location Filter:
+      <select id="pt" name="pt">
+
+        ## Generate <option> tag for each city
+        #foreach( $city_lon_lat in $cities.keySet() )
+          #set( $city_name = $cities.get($city_lon_lat) )
+          <option value="$city_lon_lat"
+            #if($loc == $city_lon_lat)selected="true"#end
+          >
+            $city_name
+          </option>
+        #end
+
+      </select>
+
+    </label>
+
+    <span #annTitle("Add the &d parameter")>
+      Distance (KM):
+      <input id="d" name="d" type="text" size="6"
+        value="#if($dist != '')${dist}#{else}10#end"  ## TODO: isn't the default of 10 above sufficient?  no if/else needed?
+      />
+    </span>
+
+    <input type="hidden" name="sfield" value="store"/>
+    <input type="hidden" id="spatialFQ" name="fq" value=""/>
+    <input type="hidden" name="queryOpts" value="spatial"/>        
+
+  </div>
+
+  <script type="text/javascript">
+    $('#query-form').submit(function() {
+      if ($("#pt").val() != "none") {
+        $("#spatialFQ").val("{!bbox}");
+      }
+      $fqs = $("#allFQs").val();
+      $fqs = $fqs.replace("{!bbox}", "");
+      if ($fqs == ''){
+        $("#allFQs").remove();
+      }
+      $("#allFQs").val($fqs);
+      return true;
+    });
+  </script>
+
+#end
diff --git a/zookeeper/solr/collection1/conf/velocity/results_list.vm b/zookeeper/solr/collection1/conf/velocity/results_list.vm

new file mode 100644 (file)

index 0000000..f73532b
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/results_list.vm
@@ -0,0 +1,22 @@
+#**
+ *  Render the main Results List
+ *#
+
+## Usually displayed inside <div class="results">
+
+#if($response.response.get('grouped'))
+
+  #foreach($grouping in $response.response.get('grouped'))
+    #parse("hit_grouped.vm")
+  #end
+
+#else
+
+  #foreach($doc in $response.results)
+    #parse("hit.vm")
+    ## Can get an extremely simple view of the doc
+    ## which might be nicer for debugging
+    ##parse("hit_plain.vm")
+  #end
+
+#end
diff --git a/zookeeper/solr/collection1/conf/velocity/richtext_doc.vm b/zookeeper/solr/collection1/conf/velocity/richtext_doc.vm

new file mode 100644 (file)

index 0000000..9e8d6cb
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/richtext_doc.vm
@@ -0,0 +1,153 @@
+#**
+ *  Render a complex document in the results list
+ *#
+
+## Load Mime-Type List and Mapping
+#parse('mime_type_lists.vm')
+## Sets:
+## * supportedMimeTypes, AKA supportedtypes
+## * mimeExtensionsMap, AKA extMap
+
+## Title
+#if($doc.getFieldValue('title'))
+  #set($title = $esc.html($doc.getFirstValue('title')))
+#else
+  #set($title = "["+$doc.getFieldValue('id')+"]")
+#end
+
+## URL
+#if($doc.getFieldValue('url'))
+  #set($url = $doc.getFieldValue('url'))
+#elseif($doc.getFieldValue('resourcename'))
+  #set($url = "file:///$doc.getFieldValue('resourcename')")
+#else
+  #set($url = "$doc.getFieldValue('id')")
+#end
+
+## Sort out Mime-Type
+#set($ct = $list.get($doc.getFirstValue('content_type').split(";"),0))
+#set($filename = $doc.getFieldValue('resourcename'))
+#set($filetype = false)
+#set($filetype = $mimeExtensionsMap.get($ct))
+
+## TODO: falling back to file extension is convenient,
+## except when you don't have an icon for that extension
+## example "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+## document with a .docx extension.
+## It'd be nice to fall back to an "unknown" or the existing "file" type
+## We sort of do this below, but only if the filename has no extension
+## (anything after the last dot).
+
+#if(!$filetype)
+  #set($filetype = $filename.substring($filename.lastIndexOf(".")).substring(1))
+#end
+
+## #if(!$filetype)
+##   #set($filetype = "file")
+## #end
+## #if(!$supportedMimeTypes.contains($filetype))
+##   #set($filetype = "file")
+## #end
+
+## Row 1: Icon and Title and mlt link
+<div class="result-title">
+  ## Icon
+  ## Small file type icons from http://www.splitbrain.org/projects/file_icons (public domain)
+  <img src="#{url_root}/img/filetypes/${filetype}.png" align="center">
+
+  ## Title, hyperlinked
+  <a href="${url}" target="_blank">
+    <b>$title</b></a>
+
+  ## Link for MLT / More Like This / Find Similar
+  <span class="mlt">
+    #if($params.getBool('mlt', false) == false)
+      <a href="#lensNoQ&q=id:%22$docId%22&mlt=true">
+        More Like This</a>
+    #end
+  </span>
+
+</div>
+
+## Row 2?: ID / URL
+<div>
+  Id: #field('id')
+</div>
+
+## Resource Name
+<div>
+  #if($doc.getFieldValue('resourcename'))
+    Resource name: $filename 
+  #elseif($url)
+    URL: $url
+  #end
+  #if($ct)
+    ($ct)
+  #end
+</div>
+
+## Author
+#if($doc.getFieldValue('author'))
+  <div>
+    Author: #field('author')
+  </div>
+#end
+
+## Last_Modified Date
+#if($doc.getFieldValue('last_modified'))
+  <div>
+    last-modified:
+    #field('last_modified')
+  </div>
+#end
+
+## Main content of doc
+<div class="result-body">
+  #field('content')
+</div>
+
+## Display Similar Documents / MLT = More Like This
+<div class="mlt">
+  #set($mlt = $mltResults.get($docId))
+  #set($mltOn = $params.getBool('mlt'))
+  #if($mltOn == true)
+    <div class="field-name">
+      Similar Items
+    </div>
+  #end
+  ## If has MLT enabled An Entries to show
+  #if ($mltOn && $mlt && $mlt.size() > 0)
+    <ul>
+      #foreach($mltHit in $mlt)
+        #set($mltId = $mltHit.getFieldValue('id'))
+        <li>
+          <div>
+            <a href="#url_for_home?q=id:$mltId">
+              $mltId</a>
+          </div>
+          <div>
+            <span class="field-name">
+              Title:
+            </span>
+            $mltHit.getFieldValue('title')
+          </div>
+          <div>
+            <span class="field-name">
+              Author:
+            </span>
+            $mltHit.getFieldValue('author')
+            <span class="field-name">
+              Description:
+            </span>
+            $mltHit.getFieldValue('description')
+          </div>
+        </li>
+      #end    ## end for each mltHit in $mlt
+    </ul>
+  ## Else MLT Enabled but no mlt results for this query
+  #elseif($mltOn && $mlt.size() == 0)
+    <div>No Similar Items Found</div>
+  #end
+</div>  ## div class=mlt
+
+#parse('debug.vm')
diff --git a/zookeeper/solr/collection1/conf/velocity/suggest.vm b/zookeeper/solr/collection1/conf/velocity/suggest.vm

new file mode 100644 (file)

index 0000000..dae6b83
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/suggest.vm
@@ -0,0 +1,8 @@
+#**
+ *  Provides cynamic spelling suggestions
+ *  as you type in the search form
+ *#
+
+#foreach($t in $response.response.terms.name)
+  $t.key
+#end
diff --git a/zookeeper/solr/collection1/conf/velocity/tabs.vm b/zookeeper/solr/collection1/conf/velocity/tabs.vm

new file mode 100644 (file)

index 0000000..da19cbc
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/velocity/tabs.vm
@@ -0,0 +1,50 @@
+#**
+ *  Provides navigation/access to Advanced search options
+ *  Usually displayed near the top of the page
+ *#
+
+##TODO: Make some nice tabs here
+
+#set($queryOpts = $params.get("queryOpts"))
+
+<div class="tabs-bar" #annTitle("Click the link to demonstrate various Solr capabilities")>
+
+  <span>Type of Search:</span>
+
+  ##queryOpts=$queryOpts
+
+  ## return to Simple Search
+  ##set( $selected = ($queryOpts && $queryOpts != "") )
+  #set( $selected = ! $queryOpts )
+  <span class="tab #if($selected)selected#end">
+    #if($selected)
+      Simple
+    #else
+      <a href="#url_for_home/?#debug#annotate">
+        Simple</a>
+    #end
+  </span>
+
+  ## GEO-Spatial / Location Based
+  #set( $selected = ($queryOpts == "spatial") )
+  <span class="tab #if($selected)selected#end">
+    #if($selected)
+      Spatial
+    #else
+      <a href="#url_for_home?&queryOpts=spatial#debug#annotate">
+        Spatial</a>
+    #end
+  </span>
+
+  ## Group By Field
+  #set( $selected = ($queryOpts == "group") )
+  <span class="tab #if($selected)selected#end">
+    #if($selected)
+      Group By
+    #else
+      <a href="#url_for_home?#debug#annotate&queryOpts=group&group=true&group.field=manu_exact">
+        Group By</a>
+    #end
+  </span>
+
+</div>
diff --git a/zookeeper/solr/collection1/conf/xslt/example.xsl b/zookeeper/solr/collection1/conf/xslt/example.xsl

new file mode 100644 (file)

index 0000000..b899270
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/xslt/example.xsl
@@ -0,0 +1,132 @@
+<?xml version='1.0' encoding='UTF-8'?>
+
+<!-- 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ -->
+
+<!-- 
+  Simple transform of Solr query results to HTML
+ -->
+<xsl:stylesheet version='1.0'
+    xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
+>
+
+  <xsl:output media-type="text/html" encoding="UTF-8"/> 
+  
+  <xsl:variable name="title" select="concat('Solr search results (',response/result/@numFound,' documents)')"/>
+  
+  <xsl:template match='/'>
+    <html>
+      <head>
+        <title><xsl:value-of select="$title"/></title>
+        <xsl:call-template name="css"/>
+      </head>
+      <body>
+        <h1><xsl:value-of select="$title"/></h1>
+        <div class="note">
+          This has been formatted by the sample "example.xsl" transform -
+          use your own XSLT to get a nicer page
+        </div>
+        <xsl:apply-templates select="response/result/doc"/>
+      </body>
+    </html>
+  </xsl:template>
+  
+  <xsl:template match="doc">
+    <xsl:variable name="pos" select="position()"/>
+    <div class="doc">
+      <table width="100%">
+        <xsl:apply-templates>
+          <xsl:with-param name="pos"><xsl:value-of select="$pos"/></xsl:with-param>
+        </xsl:apply-templates>
+      </table>
+    </div>
+  </xsl:template>
+
+  <xsl:template match="doc/*[@name='score']" priority="100">
+    <xsl:param name="pos"></xsl:param>
+    <tr>
+      <td class="name">
+        <xsl:value-of select="@name"/>
+      </td>
+      <td class="value">
+        <xsl:value-of select="."/>
+
+        <xsl:if test="boolean(//lst[@name='explain'])">
+          <xsl:element name="a">
+            <!-- can't allow whitespace here -->
+            <xsl:attribute name="href">javascript:toggle("<xsl:value-of select="concat('exp-',$pos)" />");</xsl:attribute>?</xsl:element>
+          <br/>
+          <xsl:element name="div">
+            <xsl:attribute name="class">exp</xsl:attribute>
+            <xsl:attribute name="id">
+              <xsl:value-of select="concat('exp-',$pos)" />
+            </xsl:attribute>
+            <xsl:value-of select="//lst[@name='explain']/str[position()=$pos]"/>
+          </xsl:element>
+        </xsl:if>
+      </td>
+    </tr>
+  </xsl:template>
+
+  <xsl:template match="doc/arr" priority="100">
+    <tr>
+      <td class="name">
+        <xsl:value-of select="@name"/>
+      </td>
+      <td class="value">
+        <ul>
+        <xsl:for-each select="*">
+          <li><xsl:value-of select="."/></li>
+        </xsl:for-each>
+        </ul>
+      </td>
+    </tr>
+  </xsl:template>
+
+
+  <xsl:template match="doc/*">
+    <tr>
+      <td class="name">
+        <xsl:value-of select="@name"/>
+      </td>
+      <td class="value">
+        <xsl:value-of select="."/>
+      </td>
+    </tr>
+  </xsl:template>
+
+  <xsl:template match="*"/>
+  
+  <xsl:template name="css">
+    <script>
+      function toggle(id) {
+        var obj = document.getElementById(id);
+        obj.style.display = (obj.style.display != 'block') ? 'block' : 'none';
+      }
+    </script>
+    <style type="text/css">
+      body { font-family: "Lucida Grande", sans-serif }
+      td.name { font-style: italic; font-size:80%; }
+      td { vertical-align: top; }
+      ul { margin: 0px; margin-left: 1em; padding: 0px; }
+      .note { font-size:80%; }
+      .doc { margin-top: 1em; border-top: solid grey 1px; }
+      .exp { display: none; font-family: monospace; white-space: pre; }
+    </style>
+  </xsl:template>
+
+</xsl:stylesheet>
diff --git a/zookeeper/solr/collection1/conf/xslt/example_atom.xsl b/zookeeper/solr/collection1/conf/xslt/example_atom.xsl

new file mode 100644 (file)

index 0000000..b6c2315
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/xslt/example_atom.xsl
@@ -0,0 +1,67 @@
+<?xml version='1.0' encoding='UTF-8'?>
+
+<!-- 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ -->
+
+<!-- 
+  Simple transform of Solr query results to Atom
+ -->
+
+<xsl:stylesheet version='1.0'
+    xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
+
+  <xsl:output
+       method="xml"
+       encoding="utf-8"
+       media-type="application/xml"
+  />
+
+  <xsl:template match='/'>
+    <xsl:variable name="query" select="response/lst[@name='responseHeader']/lst[@name='params']/str[@name='q']"/>
+    <feed xmlns="http://www.w3.org/2005/Atom">
+      <title>Example Solr Atom 1.0 Feed</title>
+      <subtitle>
+       This has been formatted by the sample "example_atom.xsl" transform -
+       use your own XSLT to get a nicer Atom feed.
+      </subtitle>
+      <author>
+        <name>Apache Solr</name>
+        <email>solr-user@lucene.apache.org</email>
+      </author>
+      <link rel="self" type="application/atom+xml" 
+            href="http://localhost:8983/solr/q={$query}&amp;wt=xslt&amp;tr=atom.xsl"/>
+      <updated>
+        <xsl:value-of select="response/result/doc[position()=1]/date[@name='timestamp']"/>
+      </updated>
+      <id>tag:localhost,2007:example</id>
+      <xsl:apply-templates select="response/result/doc"/>
+    </feed>
+  </xsl:template>
+    
+  <!-- search results xslt -->
+  <xsl:template match="doc">
+    <xsl:variable name="id" select="str[@name='id']"/>
+    <entry>
+      <title><xsl:value-of select="str[@name='name']"/></title>
+      <link href="http://localhost:8983/solr/select?q={$id}"/>
+      <id>tag:localhost,2007:<xsl:value-of select="$id"/></id>
+      <summary><xsl:value-of select="arr[@name='features']"/></summary>
+      <updated><xsl:value-of select="date[@name='timestamp']"/></updated>
+    </entry>
+  </xsl:template>
+
+</xsl:stylesheet>
diff --git a/zookeeper/solr/collection1/conf/xslt/example_rss.xsl b/zookeeper/solr/collection1/conf/xslt/example_rss.xsl

new file mode 100644 (file)

index 0000000..2857f11
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/xslt/example_rss.xsl
@@ -0,0 +1,66 @@
+<?xml version='1.0' encoding='UTF-8'?>
+
+<!-- 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ -->
+
+<!-- 
+  Simple transform of Solr query results to RSS
+ -->
+
+<xsl:stylesheet version='1.0'
+    xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
+
+  <xsl:output
+       method="xml"
+       encoding="utf-8"
+       media-type="application/xml"
+  />
+  <xsl:template match='/'>
+    <rss version="2.0">
+       <channel>
+        <title>Example Solr RSS 2.0 Feed</title>
+         <link>http://localhost:8983/solr</link>
+         <description>
+          This has been formatted by the sample "example_rss.xsl" transform -
+          use your own XSLT to get a nicer RSS feed.
+         </description>
+         <language>en-us</language>
+         <docs>http://localhost:8983/solr</docs>
+         <xsl:apply-templates select="response/result/doc"/>
+       </channel>
+    </rss>
+  </xsl:template>
+  
+  <!-- search results xslt -->
+  <xsl:template match="doc">
+    <xsl:variable name="id" select="str[@name='id']"/>
+    <xsl:variable name="timestamp" select="date[@name='timestamp']"/>
+    <item>
+      <title><xsl:value-of select="str[@name='name']"/></title>
+      <link>
+        http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/>
+      </link>
+      <description>
+        <xsl:value-of select="arr[@name='features']"/>
+      </description>
+      <pubDate><xsl:value-of select="$timestamp"/></pubDate>
+      <guid>
+        http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/>
+      </guid>
+    </item>
+  </xsl:template>
+</xsl:stylesheet>
diff --git a/zookeeper/solr/collection1/conf/xslt/luke.xsl b/zookeeper/solr/collection1/conf/xslt/luke.xsl

new file mode 100644 (file)

index 0000000..8553f3c
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/xslt/luke.xsl
@@ -0,0 +1,337 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+    
+    http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+
+
+<!-- 
+  Display the luke request handler with graphs
+ -->
+<xsl:stylesheet
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns="http://www.w3.org/1999/xhtml"
+    version="1.0"
+    >
+    <xsl:output
+        method="html"
+        encoding="UTF-8"
+        media-type="text/html"
+        doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
+        doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
+    />
+
+    <xsl:variable name="title">Solr Luke Request Handler Response</xsl:variable>
+
+    <xsl:template match="/">
+        <html xmlns="http://www.w3.org/1999/xhtml">
+            <head>
+                <link rel="stylesheet" type="text/css" href="solr-admin.css"/>
+                <link rel="icon" href="favicon.ico" type="image/ico"/>
+                <link rel="shortcut icon" href="favicon.ico" type="image/ico"/>
+                <title>
+                    <xsl:value-of select="$title"/>
+                </title>
+                <xsl:call-template name="css"/>
+
+            </head>
+            <body>
+                <h1>
+                    <xsl:value-of select="$title"/>
+                </h1>
+                <div class="doc">
+                    <ul>
+                        <xsl:if test="response/lst[@name='index']">
+                            <li>
+                                <a href="#index">Index Statistics</a>
+                            </li>
+                        </xsl:if>
+                        <xsl:if test="response/lst[@name='fields']">
+                            <li>
+                                <a href="#fields">Field Statistics</a>
+                                <ul>
+                                    <xsl:for-each select="response/lst[@name='fields']/lst">
+                                        <li>
+                                            <a href="#{@name}">
+                                                <xsl:value-of select="@name"/>
+                                            </a>
+                                        </li>
+                                    </xsl:for-each>
+                                </ul>
+                            </li>
+                        </xsl:if>
+                        <xsl:if test="response/lst[@name='doc']">
+                            <li>
+                                <a href="#doc">Document statistics</a>
+                            </li>
+                        </xsl:if>
+                    </ul>
+                </div>
+                <xsl:if test="response/lst[@name='index']">
+                    <h2><a name="index"/>Index Statistics</h2>
+                    <xsl:apply-templates select="response/lst[@name='index']"/>
+                </xsl:if>
+                <xsl:if test="response/lst[@name='fields']">
+                    <h2><a name="fields"/>Field Statistics</h2>
+                    <xsl:apply-templates select="response/lst[@name='fields']"/>
+                </xsl:if>
+                <xsl:if test="response/lst[@name='doc']">
+                    <h2><a name="doc"/>Document statistics</h2>
+                    <xsl:apply-templates select="response/lst[@name='doc']"/>
+                </xsl:if>
+            </body>
+        </html>
+    </xsl:template>
+
+    <xsl:template match="lst">
+        <xsl:if test="parent::lst">
+            <tr>
+                <td colspan="2">
+                    <div class="doc">
+                        <xsl:call-template name="list"/>
+                    </div>
+                </td>
+            </tr>
+        </xsl:if>
+        <xsl:if test="not(parent::lst)">
+            <div class="doc">
+                <xsl:call-template name="list"/>
+            </div>
+        </xsl:if>
+    </xsl:template>
+
+    <xsl:template name="list">
+        <xsl:if test="count(child::*)>0">
+            <table>
+                <thead>
+                    <tr>
+                        <th colspan="2">
+                            <p>
+                                <a name="{@name}"/>
+                            </p>
+                            <xsl:value-of select="@name"/>
+                        </th>
+                    </tr>
+                </thead>
+                <tbody>
+                    <xsl:choose>
+                        <xsl:when
+                            test="@name='histogram'">
+                            <tr>
+                                <td colspan="2">
+                                    <xsl:call-template name="histogram"/>
+                                </td>
+                            </tr>
+                        </xsl:when>
+                        <xsl:otherwise>
+                            <xsl:apply-templates/>
+                        </xsl:otherwise>
+                    </xsl:choose>
+                </tbody>
+            </table>
+        </xsl:if>
+    </xsl:template>
+
+    <xsl:template name="histogram">
+        <div class="doc">
+            <xsl:call-template name="barchart">
+                <xsl:with-param name="max_bar_width">50</xsl:with-param>
+                <xsl:with-param name="iwidth">800</xsl:with-param>
+                <xsl:with-param name="iheight">160</xsl:with-param>
+                <xsl:with-param name="fill">blue</xsl:with-param>
+            </xsl:call-template>
+        </div>
+    </xsl:template>
+
+    <xsl:template name="barchart">
+        <xsl:param name="max_bar_width"/>
+        <xsl:param name="iwidth"/>
+        <xsl:param name="iheight"/>
+        <xsl:param name="fill"/>
+        <xsl:variable name="max">
+            <xsl:for-each select="int">
+                <xsl:sort data-type="number" order="descending"/>
+                <xsl:if test="position()=1">
+                    <xsl:value-of select="."/>
+                </xsl:if>
+            </xsl:for-each>
+        </xsl:variable>
+        <xsl:variable name="bars">
+           <xsl:value-of select="count(int)"/>
+        </xsl:variable>
+        <xsl:variable name="bar_width">
+           <xsl:choose>
+             <xsl:when test="$max_bar_width &lt; ($iwidth div $bars)">
+               <xsl:value-of select="$max_bar_width"/>
+             </xsl:when>
+             <xsl:otherwise>
+               <xsl:value-of select="$iwidth div $bars"/>
+             </xsl:otherwise>
+           </xsl:choose>
+        </xsl:variable>
+        <table class="histogram">
+           <tbody>
+              <tr>
+                <xsl:for-each select="int">
+                   <td>
+                 <xsl:value-of select="."/>
+                 <div class="histogram">
+                  <xsl:attribute name="style">background-color: <xsl:value-of select="$fill"/>; width: <xsl:value-of select="$bar_width"/>px; height: <xsl:value-of select="($iheight*number(.)) div $max"/>px;</xsl:attribute>
+                 </div>
+                   </td> 
+                </xsl:for-each>
+              </tr>
+              <tr>
+                <xsl:for-each select="int">
+                   <td>
+                       <xsl:value-of select="@name"/>
+                   </td>
+                </xsl:for-each>
+              </tr>
+           </tbody>
+        </table>
+    </xsl:template>
+
+    <xsl:template name="keyvalue">
+        <xsl:choose>
+            <xsl:when test="@name">
+                <tr>
+                    <td class="name">
+                        <xsl:value-of select="@name"/>
+                    </td>
+                    <td class="value">
+                        <xsl:value-of select="."/>
+                    </td>
+                </tr>
+            </xsl:when>
+            <xsl:otherwise>
+                <xsl:value-of select="."/>
+            </xsl:otherwise>
+        </xsl:choose>
+    </xsl:template>
+
+    <xsl:template match="int|bool|long|float|double|uuid|date">
+        <xsl:call-template name="keyvalue"/>
+    </xsl:template>
+
+    <xsl:template match="arr">
+        <tr>
+            <td class="name">
+                <xsl:value-of select="@name"/>
+            </td>
+            <td class="value">
+                <ul>
+                    <xsl:for-each select="child::*">
+                        <li>
+                            <xsl:apply-templates/>
+                        </li>
+                    </xsl:for-each>
+                </ul>
+            </td>
+        </tr>
+    </xsl:template>
+
+    <xsl:template match="str">
+        <xsl:choose>
+            <xsl:when test="@name='schema' or @name='index' or @name='flags'">
+                <xsl:call-template name="schema"/>
+            </xsl:when>
+            <xsl:otherwise>
+                <xsl:call-template name="keyvalue"/>
+            </xsl:otherwise>
+        </xsl:choose>
+    </xsl:template>
+
+    <xsl:template name="schema">
+        <tr>
+            <td class="name">
+                <xsl:value-of select="@name"/>
+            </td>
+            <td class="value">
+                <xsl:if test="contains(.,'unstored')">
+                    <xsl:value-of select="."/>
+                </xsl:if>
+                <xsl:if test="not(contains(.,'unstored'))">
+                    <xsl:call-template name="infochar2string">
+                        <xsl:with-param name="charList">
+                            <xsl:value-of select="."/>
+                        </xsl:with-param>
+                    </xsl:call-template>
+                </xsl:if>
+            </td>
+        </tr>
+    </xsl:template>
+
+    <xsl:template name="infochar2string">
+        <xsl:param name="i">1</xsl:param>
+        <xsl:param name="charList"/>
+
+        <xsl:variable name="char">
+            <xsl:value-of select="substring($charList,$i,1)"/>
+        </xsl:variable>
+        <xsl:choose>
+            <xsl:when test="$char='I'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='I']"/> - </xsl:when>
+            <xsl:when test="$char='T'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='T']"/> - </xsl:when>
+            <xsl:when test="$char='S'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='S']"/> - </xsl:when>
+            <xsl:when test="$char='M'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='M']"/> - </xsl:when>
+            <xsl:when test="$char='V'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='V']"/> - </xsl:when>
+            <xsl:when test="$char='o'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='o']"/> - </xsl:when>
+            <xsl:when test="$char='p'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='p']"/> - </xsl:when>
+            <xsl:when test="$char='O'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='O']"/> - </xsl:when>
+            <xsl:when test="$char='L'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='L']"/> - </xsl:when>
+            <xsl:when test="$char='B'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='B']"/> - </xsl:when>
+            <xsl:when test="$char='C'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='C']"/> - </xsl:when>
+            <xsl:when test="$char='f'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='f']"/> - </xsl:when>
+            <xsl:when test="$char='l'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='l']"/> -
+            </xsl:when>
+        </xsl:choose>
+
+        <xsl:if test="not($i>=string-length($charList))">
+            <xsl:call-template name="infochar2string">
+                <xsl:with-param name="i">
+                    <xsl:value-of select="$i+1"/>
+                </xsl:with-param>
+                <xsl:with-param name="charList">
+                    <xsl:value-of select="$charList"/>
+                </xsl:with-param>
+            </xsl:call-template>
+        </xsl:if>
+    </xsl:template>
+    <xsl:template name="css">
+        <style type="text/css">
+            <![CDATA[
+            td.name {font-style: italic; font-size:80%; }
+            .doc { margin: 0.5em; border: solid grey 1px; }
+            .exp { display: none; font-family: monospace; white-space: pre; }
+            div.histogram { background: none repeat scroll 0%; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial;}
+            table.histogram { width: auto; vertical-align: bottom; }
+            table.histogram td, table.histogram th { text-align: center; vertical-align: bottom; border-bottom: 1px solid #ff9933; width: auto; }
+            ]]>
+        </style>
+    </xsl:template>
+</xsl:stylesheet>
diff --git a/zookeeper/solr/collection1/conf/xslt/updateXml.xsl b/zookeeper/solr/collection1/conf/xslt/updateXml.xsl

new file mode 100644 (file)

index 0000000..daf1344
--- /dev/null
+++ b/zookeeper/solr/collection1/conf/xslt/updateXml.xsl
@@ -0,0 +1,70 @@
+<!-- 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ -->
+
+<!--
+  Simple transform of Solr query response into Solr Update XML compliant XML.
+  When used in the xslt response writer you will get UpdaateXML as output.
+  But you can also store a query response XML to disk and feed this XML to
+  the XSLTUpdateRequestHandler to index the content. Provided as example only.
+  See http://wiki.apache.org/solr/XsltUpdateRequestHandler for more info
+ -->
+<xsl:stylesheet version='1.0' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
+  <xsl:output media-type="text/xml" method="xml" indent="yes"/>
+
+  <xsl:template match='/'>
+    <add>
+        <xsl:apply-templates select="response/result/doc"/>
+    </add>
+  </xsl:template>
+  
+  <!-- Ignore score (makes no sense to index) -->
+  <xsl:template match="doc/*[@name='score']" priority="100">
+  </xsl:template>
+
+  <xsl:template match="doc">
+    <xsl:variable name="pos" select="position()"/>
+    <doc>
+        <xsl:apply-templates>
+          <xsl:with-param name="pos"><xsl:value-of select="$pos"/></xsl:with-param>
+        </xsl:apply-templates>
+    </doc>
+  </xsl:template>
+
+  <!-- Flatten arrays to duplicate field lines -->
+  <xsl:template match="doc/arr" priority="100">
+      <xsl:variable name="fn" select="@name"/>
+      
+      <xsl:for-each select="*">
+               <xsl:element name="field">
+                   <xsl:attribute name="name"><xsl:value-of select="$fn"/></xsl:attribute>
+               <xsl:value-of select="."/>
+               </xsl:element>
+      </xsl:for-each>
+  </xsl:template>
+
+
+  <xsl:template match="doc/*">
+      <xsl:variable name="fn" select="@name"/>
+
+       <xsl:element name="field">
+           <xsl:attribute name="name"><xsl:value-of select="$fn"/></xsl:attribute>
+        <xsl:value-of select="."/>
+       </xsl:element>
+  </xsl:template>
+
+  <xsl:template match="*"/>
+</xsl:stylesheet>
diff --git a/zookeeper/solr/collection1/core.properties b/zookeeper/solr/collection1/core.properties

new file mode 100644 (file)

index 0000000..bc0cf7d
--- /dev/null
+++ b/zookeeper/solr/collection1/core.properties
@@ -0,0 +1 @@
+name=collection1
\ No newline at end of file
diff --git a/zookeeper/solr/solr.xml b/zookeeper/solr/solr.xml

new file mode 100644 (file)

index 0000000..7ae7244
--- /dev/null
+++ b/zookeeper/solr/solr.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+   This is an example of a simple "solr.xml" file for configuring one or 
+   more Solr Cores, as well as allowing Cores to be added, removed, and 
+   reloaded via HTTP requests.
+
+   More information about options available in this configuration file, 
+   and Solr Core administration can be found online:
+   http://wiki.apache.org/solr/CoreAdmin
+-->
+
+<solr>
+
+  <solrcloud>
+    <str name="host">${host:}</str>
+    <int name="hostPort">${jetty.port:8983}</int>
+    <str name="hostContext">${hostContext:solr}</str>
+    <int name="zkClientTimeout">${zkClientTimeout:15000}</int>
+    <bool name="genericCoreNodeNames">${genericCoreNodeNames:true}</bool>
+  </solrcloud>
+
+  <shardHandlerFactory name="shardHandlerFactory"
+    class="HttpShardHandlerFactory">
+    <int name="socketTimeout">${socketTimeout:0}</int>
+    <int name="connTimeout">${connTimeout:0}</int>
+  </shardHandlerFactory>
+
+</solr>
diff --git a/zookeeper/solr/zoo.cfg b/zookeeper/solr/zoo.cfg

new file mode 100644 (file)

index 0000000..aea4518
--- /dev/null
+++ b/zookeeper/solr/zoo.cfg
@@ -0,0 +1,17 @@
+# The number of milliseconds of each tick
+tickTime=2000
+# The number of ticks that the initial
+# synchronization phase can take
+initLimit=10
+# The number of ticks that can pass between
+# sending a request and getting an acknowledgement
+syncLimit=5
+
+# the directory where the snapshot is stored.
+# dataDir=/opt/zookeeper/data
+# NOTE: Solr defaults the dataDir to <solrHome>/zoo_data
+
+# the port at which the clients will connect
+# clientPort=2181
+# NOTE: Solr sets this based on zkRun / zkHost params
+
diff --git a/zookeeper/start.jar b/zookeeper/start.jar

new file mode 100644 (file)

index 0000000..3e47369

Binary files /dev/null and b/zookeeper/start.jar differ
diff --git a/zookeeper/webapps/solr-4.4.0.war b/zookeeper/webapps/solr-4.4.0.war

new file mode 120000 (symlink)

index 0000000..e25a915
--- /dev/null
+++ b/zookeeper/webapps/solr-4.4.0.war
@@ -0,0 +1 @@
+../dist/solr-4.4.0.war
\ No newline at end of file
diff --git a/zookeeper/zookeeper.sh b/zookeeper/zookeeper.sh

new file mode 120000 (symlink)

index 0000000..8f61e24
--- /dev/null
+++ b/zookeeper/zookeeper.sh
@@ -0,0 +1 @@
+../scripts/zookeeper.sh
\ No newline at end of file
diff --git a/zookeeper/zookeeper.sh.pid b/zookeeper/zookeeper.sh.pid

new file mode 100644 (file)

index 0000000..3ebb04f
--- /dev/null
+++ b/zookeeper/zookeeper.sh.pid
@@ -0,0 +1 @@
+18024
author	Dennis Schafroth <dennis@indexdata.com>
	Fri, 29 Nov 2013 11:14:30 +0000 (12:14 +0100)
committer	Dennis Schafroth <dennis@indexdata.com>
	Fri, 29 Nov 2013 11:14:30 +0000 (12:14 +0100)
zookeeper/.#make_same_host_config.sh	[new symlink]	patch \| blob
zookeeper/README.txt	[new file with mode: 0644]	patch \| blob
zookeeper/cloud-scripts/log4j.properties	[new file with mode: 0644]	patch \| blob
zookeeper/cloud-scripts/zkcli.bat	[new file with mode: 0644]	patch \| blob
zookeeper/cloud-scripts/zkcli.sh	[new file with mode: 0755]	patch \| blob
zookeeper/contexts/solr-jetty-context.xml	[new file with mode: 0644]	patch \| blob
zookeeper/etc/create-solrtest.keystore.sh	[new file with mode: 0755]	patch \| blob
zookeeper/etc/jetty.xml	[new file with mode: 0644]	patch \| blob
zookeeper/etc/logging.properties	[new file with mode: 0644]	patch \| blob
zookeeper/etc/solrtest.keystore	[new file with mode: 0644]	patch \| blob
zookeeper/etc/webdefault.xml	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/currency.xml	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/elevate.xml	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ca.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_fr.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_ga.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/contractions_it.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/hyphenations_ga.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stemdict_nl.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stoptags_ja.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ar.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_bg.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ca.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_cz.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_da.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_de.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_el.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_en.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_es.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_eu.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fa.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fi.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_fr.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ga.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_gl.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hi.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hu.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_hy.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_id.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_it.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ja.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_lv.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_nl.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_no.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_pt.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ro.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_ru.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_sv.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_th.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/stopwords_tr.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/lang/userdict_ja.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/protwords.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/schema.xml	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/solrconfig.xml	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/stopwords.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/conf/synonyms.txt	[new file with mode: 0644]	patch \| blob
zookeeper/example-schemaless/solr/collection1/core.properties	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/books.csv	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/books.json	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/gb18030-example.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/hd.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/ipod_other.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/ipod_video.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/manufacturers.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/mem.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/money.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/monitor.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/monitor2.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/mp500.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/post.jar	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/post.sh	[new file with mode: 0755]	patch \| blob
zookeeper/exampledocs/sd500.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/solr.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/test_utf8.sh	[new file with mode: 0755]	patch \| blob
zookeeper/exampledocs/utf8-example.xml	[new file with mode: 0644]	patch \| blob
zookeeper/exampledocs/vidcard.xml	[new file with mode: 0644]	patch \| blob
zookeeper/lib/ext/jcl-over-slf4j-1.6.6.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/ext/jul-to-slf4j-1.6.6.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/ext/log4j-1.2.16.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/ext/slf4j-api-1.6.6.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/ext/slf4j-log4j12-1.6.6.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/jetty-continuation-8.1.10.v20130312.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/jetty-deploy-8.1.10.v20130312.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/jetty-http-8.1.10.v20130312.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/jetty-io-8.1.10.v20130312.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/jetty-jmx-8.1.10.v20130312.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/jetty-security-8.1.10.v20130312.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/jetty-server-8.1.10.v20130312.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/jetty-servlet-8.1.10.v20130312.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/jetty-util-8.1.10.v20130312.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/jetty-webapp-8.1.10.v20130312.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/jetty-xml-8.1.10.v20130312.jar	[new file with mode: 0644]	patch \| blob
zookeeper/lib/servlet-api-3.0.jar	[new file with mode: 0644]	patch \| blob
zookeeper/multicore/README.txt	[new file with mode: 0644]	patch \| blob
zookeeper/multicore/core0/conf/schema.xml	[new file with mode: 0644]	patch \| blob
zookeeper/multicore/core0/conf/solrconfig.xml	[new file with mode: 0644]	patch \| blob
zookeeper/multicore/core1/conf/schema.xml	[new file with mode: 0644]	patch \| blob
zookeeper/multicore/core1/conf/solrconfig.xml	[new file with mode: 0644]	patch \| blob
zookeeper/multicore/exampledocs/ipod_other.xml	[new file with mode: 0644]	patch \| blob
zookeeper/multicore/exampledocs/ipod_video.xml	[new file with mode: 0644]	patch \| blob
zookeeper/multicore/solr.xml	[new file with mode: 0644]	patch \| blob
zookeeper/multicore/zoo.cfg	[new file with mode: 0644]	patch \| blob
zookeeper/options	[new file with mode: 0644]	patch \| blob
zookeeper/options_2	[new file with mode: 0644]	patch \| blob
zookeeper/resources/log4j.properties	[new file with mode: 0644]	patch \| blob
zookeeper/solr/README.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/README.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/admin-extra.html	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/admin-extra.menu-bottom.html	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/admin-extra.menu-top.html	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/currency.xml	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/elevate.xml	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/contractions_ca.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/contractions_fr.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/contractions_ga.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/contractions_it.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/hyphenations_ga.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stemdict_nl.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stoptags_ja.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_ar.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_bg.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_ca.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_cz.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_da.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_de.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_el.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_en.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_es.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_eu.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_fa.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_fi.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_fr.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_ga.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_gl.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_hi.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_hu.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_hy.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_id.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_it.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_ja.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_lv.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_nl.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_no.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_pt.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_ro.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_ru.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_sv.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_th.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/stopwords_tr.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/lang/userdict_ja.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/mapping-FoldToASCII.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/mapping-ISOLatin1Accent.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/protwords.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/schema.xml	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/scripts.conf	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/solrconfig.xml	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/spellings.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/stopwords.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/synonyms.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/update-script.js	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/README.txt	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/VM_global_library.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/browse.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/cluster.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/cluster_results.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/debug.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/did_you_mean.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/error.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/facet_fields.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/facet_pivot.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/facet_queries.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/facet_ranges.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/facets.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/footer.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/head.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/header.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/hit.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/hit_grouped.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/hit_plain.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/join_doc.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.css	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/jquery.autocomplete.js	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/layout.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/main.css	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/mime_type_lists.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/pagination_bottom.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/pagination_top.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/product_doc.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/query.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/query_form.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/query_group.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/query_spatial.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/results_list.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/richtext_doc.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/suggest.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/velocity/tabs.vm	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/xslt/example.xsl	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/xslt/example_atom.xsl	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/xslt/example_rss.xsl	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/xslt/luke.xsl	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/conf/xslt/updateXml.xsl	[new file with mode: 0644]	patch \| blob
zookeeper/solr/collection1/core.properties	[new file with mode: 0644]	patch \| blob
zookeeper/solr/solr.xml	[new file with mode: 0644]	patch \| blob
zookeeper/solr/zoo.cfg	[new file with mode: 0644]	patch \| blob
zookeeper/start.jar	[new file with mode: 0644]	patch \| blob
zookeeper/webapps/solr-4.4.0.war	[new symlink]	patch \| blob
zookeeper/zookeeper.sh	[new symlink]	patch \| blob
zookeeper/zookeeper.sh.pid	[new file with mode: 0644]	patch \| blob