分享

solr 5.5.1安装并配置中文分词IKAnalyzer

 soft_xiang 2016-06-23

solr 5.5.1安装并配置中文分词IKAnalyzer

下载

solr 
下载地址:http://mirrors.tuna./apache/lucene/solr/ 
主页:http://lucene./solr/ 
快速指引:http://lucene./solr/quickstart.html 
wiki:http://wiki./solr/FrontPage

ik-analyzer for solr5 
https://github.com/EugenePig/ik-analyzer-solr5

安装

solr 
直接解压即可(5x 需要jdk 1.7,6x需要jdk1.8)

ik 
下载后需要自己编译(ik-analyzer-solr5-master\README.md 中有说明) 
 
编译后文件:ik-analyzer-solr5-5.x.jar 
我已经编译了,jdk1.7编译 
地址:http://download.csdn.net/detail/soft_xiang/9557495

运行solr

solr-5.5.1\bin

运行solr start启动默认的solr服务(没有core,需要新增) 
solr start -e techproducts可以启动带example的服务 
其他更多example可以查看 
 
更多启动参数可以使用solr start -help查看 
启动成功后如下图: 
 
访问地址:http://localhost:8983/solr

去掉访问地址中的项目名

  1. 修改contextPath为/
  2. 修改"\solr-5.5.1\server\etc\jetty.xml"的RewriteHandler 

新增core

  1. 在\solr-5.5.1\server\solr下新增文件夹,如mydemo
  2. 在新增的文件夹中新增conf、data两个文件夹
  3. 将\solr-5.5.1\server\solr\configsets\basic_configs\conf下所有文件复制到新增的conf文件夹下(或者从其他example中复制)
  4. 在core admin中新增,如下图

集成ik-analyzer

  1. 关闭solr
  2. 将ik-analyzer-solr5-5.x.jar放入目录/solr-5.5.1/server/solr-webapp/webapp/WEB-INF/lib中
  3. 配置"\solr-5.5.1\example\techproducts\solr\techproducts\conf\managed-schema" 
    删除部分无用的fieldType、dynamicField等 
    添加ik支持
  1. <fieldType name="text_ik" class="solr.TextField">
  2. <analyzer type="index">
  3. <tokenizer class="org.wltea.analyzer.lucene.IKTokenizerFactory" useSmart="false" />
  4. </analyzer>
  5. <analyzer type="query">
  6. <tokenizer class="org.wltea.analyzer.lucene.IKTokenizerFactory" useSmart="true" />
  7. </analyzer>
  8. </fieldType>

or

  1. <fieldType name="text_ik" class="solr.TextField">
  2. <analyzer type="index" useSmart="false" class="org.wltea.analyzer.lucene.IKAnalyzer"/>
  3. <analyzer type="query" useSmart="true" class="org.wltea.analyzer.lucene.IKAnalyzer"/>
  4. </fieldType>

修改field name="_text_"为ik,如图 
修改text 

缩减后的文件内容见文章最后

  1. 测试 
    重启solr 
    然后进入analysis测试 
  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <!-- Solr managed schema - automatically generated - DO NOT EDIT -->
  3. <schema name="example-data-driven-schema" version="1.6">
  4. <uniqueKey>id</uniqueKey>
  5. <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
  6. <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
  7. <fieldType name="date" class="solr.TrieDateField" positionIncrementGap="0" precisionStep="0"/>
  8. <fieldType name="dates" class="solr.TrieDateField" positionIncrementGap="0" multiValued="true" precisionStep="0"/>
  9. <fieldType name="double" class="solr.TrieDoubleField" positionIncrementGap="0" precisionStep="0"/>
  10. <fieldType name="doubles" class="solr.TrieDoubleField" positionIncrementGap="0" multiValued="true" precisionStep="0"/>
  11. <fieldType name="float" class="solr.TrieFloatField" positionIncrementGap="0" precisionStep="0"/>
  12. <fieldType name="floats" class="solr.TrieFloatField" positionIncrementGap="0" multiValued="true" precisionStep="0"/>
  13. <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/>
  14. <fieldType name="int" class="solr.TrieIntField" positionIncrementGap="0" precisionStep="0"/>
  15. <fieldType name="ints" class="solr.TrieIntField" positionIncrementGap="0" multiValued="true" precisionStep="0"/>
  16. <fieldType name="long" class="solr.TrieLongField" positionIncrementGap="0" precisionStep="0"/>
  17. <fieldType name="longs" class="solr.TrieLongField" positionIncrementGap="0" multiValued="true" precisionStep="0"/>
  18. <fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
  19. <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true"/>
  20. <fieldType name="tdate" class="solr.TrieDateField" positionIncrementGap="0" precisionStep="6"/>
  21. <fieldType name="tdates" class="solr.TrieDateField" positionIncrementGap="0" multiValued="true" precisionStep="6"/>
  22. <fieldType name="tfloat" class="solr.TrieFloatField" positionIncrementGap="0" precisionStep="8"/>
  23. <fieldType name="tfloats" class="solr.TrieFloatField" positionIncrementGap="0" multiValued="true" precisionStep="8"/>
  24. <fieldType name="tint" class="solr.TrieIntField" positionIncrementGap="0" precisionStep="8"/>
  25. <fieldType name="tints" class="solr.TrieIntField" positionIncrementGap="0" multiValued="true" precisionStep="8"/>
  26. <fieldType name="tlong" class="solr.TrieLongField" positionIncrementGap="0" precisionStep="8"/>
  27. <fieldType name="tlongs" class="solr.TrieLongField" positionIncrementGap="0" multiValued="true" precisionStep="8"/>
  28. <fieldType name="tdouble" class="solr.TrieDoubleField" positionIncrementGap="0" precisionStep="8"/>
  29. <fieldType name="tdoubles" class="solr.TrieDoubleField" positionIncrementGap="0" multiValued="true" precisionStep="8"/>
  30. <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
  31. <analyzer type="index">
  32. <tokenizer class="solr.StandardTokenizerFactory"/>
  33. <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
  34. <filter class="solr.LowerCaseFilterFactory"/>
  35. </analyzer>
  36. <analyzer type="query">
  37. <tokenizer class="solr.StandardTokenizerFactory"/>
  38. <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
  39. <filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
  40. <filter class="solr.LowerCaseFilterFactory"/>
  41. </analyzer>
  42. </fieldType>
  43. <fieldType name="text_email_url" class="solr.TextField">
  44. <analyzer>
  45. <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/>
  46. <filter class="solr.TypeTokenFilterFactory" types="email_url_types.txt" useWhitelist="true"/>
  47. </analyzer>
  48. </fieldType>
  49. <fieldType name="text_shingles" class="solr.TextField" positionIncrementGap="100" multiValued="true">
  50. <analyzer type="index">
  51. <tokenizer class="solr.StandardTokenizerFactory"/>
  52. <!-- <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="false" /> -->
  53. <filter class="solr.LengthFilterFactory" min="2" max="18"/>
  54. <filter class="solr.LowerCaseFilterFactory"/>
  55. <filter class="solr.PatternReplaceFilterFactory" pattern="(^[^a-z]+$)" replacement="" replace="all"/>
  56. <filter class="solr.ShingleFilterFactory" minShingleSize="3" maxShingleSize="3"
  57. outputUnigrams="false" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="*"/>
  58. <filter class="solr.PatternReplaceFilterFactory" pattern="(.*[\*].*)" replacement=""/>
  59. <filter class="solr.TrimFilterFactory"/>
  60. <!-- PRFF could have removed everything down to an empty string, remove if so -->
  61. <filter class="solr.LengthFilterFactory" min="1" max="100"/>
  62. </analyzer>
  63. <analyzer type="query">
  64. <tokenizer class="solr.KeywordTokenizerFactory"/>
  65. <filter class="solr.LowerCaseFilterFactory"/>
  66. </analyzer>
  67. </fieldType>
  68. <!-- 中文分词器 IKAnalyzer -->
  69. <fieldType name="text_ik" class="solr.TextField">
  70. <analyzer type="index" useSmart="false" class="org.wltea.analyzer.lucene.IKAnalyzer"/>
  71. <analyzer type="query" useSmart="true" class="org.wltea.analyzer.lucene.IKAnalyzer"/>
  72. </fieldType>
  73. <field name="id" type="string" multiValued="false" indexed="true" required="true" stored="true"/>
  74. <field name="_version_" type="long" indexed="true" stored="true"/>
  75. <field name="content_type" type="string" indexed="true" stored="true"/>
  76. <field name="doc_type" type="string" indexed="true" stored="true"/>
  77. <field name="title" type="string" indexed="true" stored="true"/>
  78. <field name="language" type="string" indexed="true" stored="true"/>
  79. <field name="content" type="text_general" multiValued="false" indexed="true" stored="true"/>
  80. <field name="text_shingles" type="text_shingles" indexed="true" stored="false"/>
  81. <!-- default _text_
  82. <field name="_text_" type="text_general" multiValued="true" indexed="true" stored="false"/>
  83. -->
  84. <field name="_text_" type="text_ik"/>
  85. <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
  86. <dynamicField name="attr_*" type="text_general" multiValued="true" indexed="true" stored="true"/>
  87. <dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/>
  88. <dynamicField name="*_dts" type="date" multiValued="true" indexed="true" stored="true"/>
  89. <dynamicField name="*_is" type="ints" indexed="true" stored="true"/>
  90. <dynamicField name="*_ss" type="strings" indexed="true" stored="true"/>
  91. <dynamicField name="*_ls" type="longs" indexed="true" stored="true"/>
  92. <dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/>
  93. <dynamicField name="*_fs" type="floats" indexed="true" stored="true"/>
  94. <dynamicField name="*_ds" type="doubles" indexed="true" stored="true"/>
  95. <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
  96. <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
  97. <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
  98. <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
  99. <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
  100. <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
  101. <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
  102. <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
  103. <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
  104. <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
  105. <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
  106. <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
  107. <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
  108. <dynamicField name="*_tis" type="tints" indexed="true" stored="true"/>
  109. <dynamicField name="*_tls" type="tlongs" indexed="true" stored="true"/>
  110. <dynamicField name="*_tfs" type="tfloats" indexed="true" stored="true"/>
  111. <dynamicField name="*_tds" type="tdoubles" indexed="true" stored="true"/>
  112. <dynamicField name="*_tdts" type="tdates" indexed="true" stored="true"/>
  113. <copyField source="content" dest="text_shingles"/>
  114. <copyField source="*" dest="_text_"/>
  115. </schema>

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约