elasticsearch
lucene : 倒排索引
如下: 我 (1:1) {0} 表示第一行出现一次,索引位置为0


elasticsearch 部署 elasticsearch-2.2.1.zip 192.168.112.101 node1 192.168.112.102 node2 192.168.112.103 node3 三台机器,每台机器上都部署。 es不能以root用户启动(因为es可以远程执行脚本,对于主机不安全) ## 所以三台主机都创建用户 [root@node2 ~]# useradd sxt [root@node2 ~]# echo sxt | passwd --stdin sxt [root@node2 ~]# mkdir -p /opt/sxt/es [root@node2 ~]# cd /opt/sxt [root@node1 sxt]# cd /opt/sxt/es/ [root@node1 es]# ll total 28740 -rw-r--r--. 1 root root 29428075 Sep 10 21:18 elasticsearch-2.2.1.zip [root@node1 sxt]# chown sxt:sxt es [root@node1 sxt]# su sxt [sxt@node1 sxt]$ cd es [sxt@node1 es]$ ll total 28740 -rw-r--r--. 1 root root 29428075 Sep 10 21:18 elasticsearch-2.2.1.zip [sxt@node1 es]$ unzip elasticsearch-2.2.1.zip [sxt@node1 es]$ cd elasticsearch-2.2.1/config/elasticsearch.yml ## 修改 cluster.name: bjsxt-es node.name: node1 network.host: 192.168.112.101 discovery.zen.ping.multicast.enabled: false ## 放在末尾 discovery.zen.ping.unicast.hosts: ["192.168.112.101","192.168.112.102", "192.168.112.103"] discovery.zen.ping_timeout: 120s client.transport.ping_timeout: 60s [sxt@node1 es]$ scp -r elasticsearch-2.2.1 sxt@node2:`pwd` ## 分发到node2和node3 [sxt@node1 bin]$ cd /opt/sxt/es/elasticsearch-2.2.1/bin [sxt@node1 bin]$ ./elasticsearch ## node2,node3都启动此命令

配置json内容的格式化ui 02_第二阶段 hadoop体系之离线计算\12_EL SEARCH 搜索引擎\01资料\01资料\附件\plugins 将文件夹下的head上传到 [root@node1 plugins]# pwd /opt/sxt/es/elasticsearch-2.2.1/plugins [root@node1 plugins]# ll total 4 drwxr-xr-x. 6 sxt sxt 4096 Sep 10 21:41 head ## 注意权限head 为sxt [root@node1 plugins]# chown -R sxt:sxt head
## 如果不小心以root用户启动,报错,如下。此时需要删除logs文件夹。否则再次以sxt启动也可能失败。
[root@node1 plugins]# cd /opt/sxt/es/elasticsearch-2.2.1/bin
[root@node1 bin]# ./elasticsearch
Exception in thread "main" java.lang.RuntimeException: don't run elasticsearch as root.
at org.elasticsearch.bootstrap.Bootstrap.initializeNatives(Bootstrap.java:93)
at org.elasticsearch.bootstrap.Bootstrap.setup(Bootstrap.java:144)
at org.elasticsearch.bootstrap.Bootstrap.init(Bootstrap.java:285)
at org.elasticsearch.bootstrap.Elasticsearch.main(Elasticsearch.java:35)
[root@node1 elasticsearch-2.2.1]# rm -rf logs
## 重新启动 ### ctrl+c 结束程序
[root@node1 elasticsearch-2.2.1]# su sxt
[sxt@node1 elasticsearch-2.2.1]$ cd /opt/sxt/es/elasticsearch-2.2.1/bin
[sxt@node1 bin]$ ./elasticsearch
## 访问页面内容如下;
http://node2:9200/_plugin/head/

横向扩展sharding切片,纵向扩展搭建ha. 一般lucense的分片不可修改,在规划时候需要考虑好,一经确认不可修改。(可以给分片做备份)

通过curl 操作es [root@node1 plugins]# curl -XPUT http://192.168.112.101:9200/bjsxt/ 如下:创建了lucene分片。粗体代表主分片,普通矩形框表示备分片
称为创建索引库 (相当于数据库)

node3挂掉后,出现短暂的警告,过一会儿又重新调整为如下第二图(达到健康状态了,自动备份了)。
再次重启node3.过一会如图第三。 * 代表是主。




curl -XPOST http://192.168.112.101:9200/bjsxt/employee -d '
{
"first_name" : "bin",
"age" : 33,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}'
创建type和document.
[root@node1 plugins]# curl -XPUT http://192.168.112.101:9200/bjsxt/
{"acknowledged":true}[root@node1 plugins]# curl -XPOST http://192.168.112.101:9200/bjsxt/employee -d '
> {
> "first_name" : "bin",
> "age" : 33,
> "about" : "I love to go rock climbing",
> "interests": [ "sports", "music" ]
> }'
{"_index":"bjsxt","_type":"employee","_id":"AW0brHsbOCeeN2j3g-hG","_version":1,"_shards":{"total":2,"successful":2,"failed":0},"created":true}[root@node1 plugins]#

curl -XPOST http://192.168.112.101:9200/bjsxt/employee -d '
{
"first_name" : "gob bin",
"age" : 43,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}'
curl -XPOST http://192.168.112.101:9200/bjsxt/employee -d '
{
"first_name" : "pablo2",
"age" : 33,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ],
"sex": "man"
}'
#XPUT 必须给出id
curl -XPUT http://192.168.112.101:9200/bjsxt/employee/1 -d '
{
"first_name" : "god bin",
"last_name" : "pang",
"age" : 42,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}'
## 修改age 44
curl -XPUT http://192.168.112.101:9200/bjsxt/employee/1 -d '
{
"first_name" : "god bin",
"last_name" : "pang",
"age" : 44,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}'
curl -XPOST http://192.168.112.101:9200/bjsxt/employee/1 -d '
{
"first_name" : "pablo2",
"age" : 33,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ],
"sex": "man"
}'
## XPUT,XPOST 都可以做创建和修改。 XPUT 必须给出id,如果id不存在就创建,存在则修改。
XPOST 不用必须给定id
[root@node1 plugins]# curl -XGET http://192.168.112.101:9200/bjsxt/employee/1?pretty
{
"_index" : "bjsxt",
"_type" : "employee",
"_id" : "1",
"_version" : 4,
"found" : true,
"_source" : {
"first_name" : "pablo2",
"age" : 33,
"about" : "I love to go rock climbing",
"interests" : [ "sports", "music" ],
"sex" : "man"
}
}

[root@node1 plugins]# curl -XGET http://192.168.112.101:9200/bjsxt/employee/_search?q=first_name="bin"
{"took":31,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":2,"max_score":0.079459734,"hits":[{"_index":"bjsxt","_type":"employee","_id":"AW0brHsbOCeeN2j3g-hG","_score":0.079459734,"_source":
{
"first_name" : "bin",
"age" : 33,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}},{"_index":"bjsxt","_type":"employee","_id":"AW0brvCeOCeeN2j3g-hH","_score":0.01125201,"_source":
{
"first_name" : "gob bin",
"age" : 43,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}}]}}
[root@node1 plugins]# curl -XGET http://192.168.112.101:9200/bjsxt/employee/_search?pretty -d '
> {
> "query":
> {"match":
> {"first_name":"bin"}
> }
> }'
{
"took" : 13,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : 1.0,
"hits" : [ {
"_index" : "bjsxt",
"_type" : "employee",
"_id" : "AW0brHsbOCeeN2j3g-hG",
"_score" : 1.0,
"_source" : {
"first_name" : "bin",
"age" : 33,
"about" : "I love to go rock climbing",
"interests" : [ "sports", "music" ]
}
}, {
"_index" : "bjsxt",
"_type" : "employee",
"_id" : "AW0brvCeOCeeN2j3g-hH",
"_score" : 0.19178301,
"_source" : {
"first_name" : "gob bin",
"age" : 43,
"about" : "I love to go rock climbing",
"interests" : [ "sports", "music" ]
}
} ]
}
}
[root@node1 plugins]# curl -XGET http://192.168.112.101:9200/bjsxt/employee/_search?pretty -d '
> {
> "query":
> {"multi_match":
> {
> "query":"bin",
> "fields":["last_name","first_name"],
> "operator":"and"
> }
> }
> }'
{
"took" : 13,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : 0.5906161,
"hits" : [ {
"_index" : "bjsxt",
"_type" : "employee",
"_id" : "AW0brHsbOCeeN2j3g-hG",
"_score" : 0.5906161,
"_source" : {
"first_name" : "bin",
"age" : 33,
"about" : "I love to go rock climbing",
"interests" : [ "sports", "music" ]
}
}, {
"_index" : "bjsxt",
"_type" : "employee",
"_id" : "AW0brvCeOCeeN2j3g-hH",
"_score" : 0.058849156,
"_source" : {
"first_name" : "gob bin",
"age" : 43,
"about" : "I love to go rock climbing",
"interests" : [ "sports", "music" ]
}
} ]
}
}
[root@node1 plugins]# curl -XGET http://192.168.112.101:9200/bjsxt/employee/_search?pretty -d '
> {
> "query":
> {"bool" :
> {
> "must" :
> {"match":
> {"first_name":"bin"}
> },
> "must" :
> {"match":
> {"age":33}
> }
> }
> }
> }'
{
"took" : 10,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.163388,
"hits" : [ {
"_index" : "bjsxt",
"_type" : "employee",
"_id" : "AW0brHsbOCeeN2j3g-hG",
"_score" : 1.163388,
"_source" : {
"first_name" : "bin",
"age" : 33,
"about" : "I love to go rock climbing",
"interests" : [ "sports", "music" ]
}
} ]
}
}
[root@node1 plugins]# curl -XGET http://192.168.112.101:9200/bjsxt/employee/_search?pretty -d '
> {
> "query":
> {"bool" :
> {
> "must" :
> {"match":
> {"first_name":"bin"}
> },
> "must_not" :
> {"match":
> {"age":33}
> }
> }
> }
> }'
{
"took" : 8,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.19178301,
"hits" : [ {
"_index" : "bjsxt",
"_type" : "employee",
"_id" : "AW0brvCeOCeeN2j3g-hH",
"_score" : 0.19178301,
"_source" : {
"first_name" : "gob bin",
"age" : 43,
"about" : "I love to go rock climbing",
"interests" : [ "sports", "music" ]
}
} ]
}
}
[root@node1 plugins]# curl -XGET http://192.168.112.101:9200/bjsxt/employee/_search?pretty -d '
> {
> "query":
> {"bool" :
> {
> "must_not" :
> {"match":
> {"first_name":"bin"}
> },
> "must_not" :
> {"match":
> {"age":33}
> }
> }
> }
> }'
{
"took" : 10,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
以集合的方式思考
[root@node1 plugins]# curl -XGET http://192.168.112.101:9200/bjsxt/employee/_search -d '
> {
> "query":
> {"bool" :
> {
> "must" :
> {"term" :
> { "first_name" : "bin" }
> }
> ,
> "must_not" :
> {"range":
> {"age" : { "from" : 20, "to" : 33 }
> }
> }
> }
> }
> }'
{"took":17,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":1,"max_score":0.19178301,"hits":[{"_index":"bjsxt","_type":"employee","_id":"AW0brvCeOCeeN2j3g-hH","_score":0.19178301,"_source":
{
"first_name" : "gob bin",
"age" : 43,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
curl -XPUT 'http://192.168.112.101:9200/test2/' -d'{"settings":{"number_of_replicas":2}}'

curl -XPUT 'http://192.168.112.101:9200/test3/' -d'{"settings":{"number_of_shards":3,"number_of_replicas":3}}'

file
segment(段,多个document组成)
document(一条记录,一个对象实例)
field(对象的属性)
term(项,分词之后的词条)
# yes
curl -XPUT http://192.168.133.6:9200/bjsxt/
# yes
curl -XDELETE http://192.168.133.6:9200/test2/
curl -XDELETE http://192.168.133.6:9200/test3/
#document:yes
curl -XPOST http://192.168.133.6:9200/bjsxt/employee -d '
{
"first_name" : "bin",
"age" : 33,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}'
curl -XPOST http://192.168.133.6:9200/bjsxt/employee -d '
{
"first_name" : "gob bin",
"age" : 43,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}'
curl -XPOST http://192.168.133.6:9200/bjsxt/employee/2 -d '
{
"first_name" : "bin",
"age" : 45,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}'
#add field yes
curl -XPOST http://192.168.133.6:9200/bjsxt/employee -d '
{
"first_name" : "pablo2",
"age" : 33,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ],
"sex": "man"
}'
curl -XPOST http://192.168.133.6:9200/bjsxt/employee/1 -d '
{
"first_name" : "pablo2",
"age" : 35,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ],
"sex": "man"
}'
----------------------------------------
#put:yes
curl -XPUT http://192.168.133.6:9200/bjsxt/employee/1 -d '
{
"first_name" : "god bin",
"last_name" : "pang",
"age" : 42,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}'
curl -XPUT http://192.168.133.6:9200/bjsxt/employee -d '
{
"first_name" : "god bin",
"last_name" : "bin",
"age" : 45,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}'
curl -XPUT http://192.168.133.6:9200/bjsxt/employee/2 -d '
{
"first_name" : "god bin",
"last_name" : "bin",
"age" : 45,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}'
curl -XPUT http://192.168.133.6:9200/bjsxt/employee/1 -d '
{
"first_name" : "god bin",
"last_name" : "pang",
"age" : 40,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}'
#根据document的id来获取数据:(without pretty)
curl -XGET http://192.168.133.6:9200/bjsxt/employee/1?pretty
#根据field来查询数据:
curl -XGET http://192.168.133.6:9200/bjsxt/employee/_search?q=first_name="bin"
#根据field来查询数据:match
curl -XGET http://192.168.133.6:9200/bjsxt/employee/_search?pretty -d '
{
"query":
{"match":
{"first_name":"bin"}
}
}'
#对多个field发起查询:multi_match
curl -XGET http://192.168.133.6:9200/bjsxt/employee/_search?pretty -d '
{
"query":
{"multi_match":
{
"query":"bin",
"fields":["last_name","first_name"],
"operator":"and"
}
}
}'
#多个term对多个field发起查询:bool(boolean)
# 组合查询,must,must_not,should
# must + must : 交集
# must +must_not :差集
# should+should : 并集
curl -XGET http://192.168.133.6:9200/bjsxt/employee/_search?pretty -d '
{
"query":
{"bool" :
{
"must" :
{"match":
{"first_name":"bin"}
},
"must" :
{"match":
{"age":33}
}
}
}
}'
curl -XGET http://192.168.133.6:9200/bjsxt/employee/_search?pretty -d '
{
"query":
{"bool" :
{
"must" :
{"match":
{"first_name":"bin"}
},
"must_not" :
{"match":
{"age":33}
}
}
}
}'
curl -XGET http://192.168.133.6:9200/bjsxt/employee/_search?pretty -d '
{
"query":
{"bool" :
{
"must_not" :
{"match":
{"first_name":"bin"}
},
"must_not" :
{"match":
{"age":33}
}
}
}
}'
##查询first_name=bin的,或者年龄在20岁到33岁之间的
curl -XGET http://192.168.133.6:9200/bjsxt/employee/_search -d '
{
"query":
{"bool" :
{
"must" :
{"term" :
{ "first_name" : "bin" }
}
,
"must_not" :
{"range":
{"age" : { "from" : 20, "to" : 33 }
}
}
}
}
}'
#修改配置
curl -XPUT 'http://192.168.133.6:9200/test2/' -d'{"settings":{"number_of_replicas":2}}'
curl -XPUT 'http://192.168.133.6:9200/test3/' -d'{"settings":{"number_of_shards":3,"number_of_replicas":3}}'
curl -XPUT 'http://192.168.133.6:9200/test4/' -d'{"settings":{"number_of_shards":6,"number_of_replicas":4}}'
curl -XPOST http://192.168.9.11:9200/bjsxt/person/_mapping -d'
{
"person": {
"properties": {
"content": {
"type": "string",
"store": "no",
"term_vector": "with_positions_offsets",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word",
"include_in_all": "true",
"boost": 8
}
}
}
}'
官网 https://www.elastic.co/guide/index.html https://www.elastic.co/guide/en/elasticsearch/client/index.html https://www.elastic.co/guide/en/elasticsearch/client/java-api/index.html https://www.elastic.co/guide/en/elasticsearch/client/java-api/2.2/transport-client.html
爬取数据,作为document的原始文件。在linux上
yum install wget
## 如下命令爬取 http://news.cctv.com;并且按照原有网站的url目录存储到data下
wget -o /tmp/wget.log -P /root/data --no-parent --no-verbose -m -D news.cctv.com -N --convert-links --random-wait -A html,HTML,shtml,SHTML http://news.cctv.com
配置分词器
https://github.com/medcl/elasticsearch-analysis-ik
版本必须与es相对应
elasticsearch-2.2.1.zip
elasticsearch-analysis-ik-1.8.0.zip ##
[sxt@node1 ik]$ pwd
/opt/sxt/es/elasticsearch-2.2.1/plugins/ik ## 修改如下配置文件
[sxt@node1 ik]$ cat plugin-descriptor.properties | grep version=
elasticsearch.version=2.2.1 ## 版本号也修改对应。
## 启动es.
## 运行java程序 createIndex
package com.sxt.es;
import java.io.File;
import java.net.InetAddress;
import java.util.HashMap;
import java.util.Map;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryParser;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.junit.Test;
import org.springframework.stereotype.Service;
import com.sxt.util.HtmlTool;
@Service
public class IndexService {
//存放html文件的目录
// public static String DATA_DIR="C:\\data\\";
public static String DATA_DIR="d:\\data\\";
public static Client client;
static {
Settings settings = Settings.settingsBuilder()
.put("cluster.name", "bjsxt-es").build();
try {
client = TransportClient
.builder()
.settings(settings)
.build()
.addTransportAddress(
new InetSocketTransportAddress(InetAddress
.getByName("node1"), 9300))
.addTransportAddress(
new InetSocketTransportAddress(InetAddress
.getByName("node2"), 9300))
.addTransportAddress(
new InetSocketTransportAddress(InetAddress
.getByName("node3"), 9300));
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* admin():管理索引库的。client.admin().indices()
*
* 索引数据的管理:client.prepare
*
*/
@Test
public void createIndex() throws Exception {
IndicesExistsResponse resp = client.admin().indices().prepareExists("bjsxt").execute().actionGet();
if(resp.isExists()){
client.admin().indices().prepareDelete("bjsxt").execute().actionGet();
}
client.admin().indices().prepareCreate("bjsxt").execute().actionGet();
new XContentFactory();
XContentBuilder builder = XContentFactory.jsonBuilder().startObject()
.startObject("htmlbean").startObject("properties")
.startObject("title").field("type", "string")
.field("store", "yes").field("analyzer", "ik_max_word")
.field("search_analyzer", "ik_max_word").endObject()
.startObject("content").field("type", "string")
.field("store", "yes").field("analyzer", "ik_max_word")
.field("search_analyzer", "ik_max_word").endObject()
// .startObject("url").field("type", "string")
// .field("store", "yes").field("analyzer", "ik_max_word")
// .field("search_analyzer", "ik_max_word").endObject()
.endObject().endObject().endObject();
PutMappingRequest mapping = Requests.putMappingRequest("bjsxt").type("htmlbean").source(builder);
client.admin().indices().putMapping(mapping).actionGet();
}
/**
* 把源数据html文件添加到索引库中(构建索引文件)
*/
@Test
public void addHtmlToES(){
readHtml(new File(DATA_DIR));
}
/**
* 遍历数据文件目录d:/data ,递归方法
* @param file
*/
public void readHtml(File file){
if(file.isDirectory()){
File[] fs =file.listFiles();
for (int i = 0; i < fs.length; i++) {
File f = fs[i];
readHtml(f);
}
}else{
HtmlBean bean;
try {
bean = HtmlTool.parserHtml(file.getPath());
if(bean!=null){
Map<String, String> dataMap =new HashMap<String, String>();
dataMap.put("title", bean.getTitle());
dataMap.put("content", bean.getContent());
dataMap.put("url", bean.getUrl());
//写索引
client.prepareIndex("bjsxt", "htmlbean").setSource(dataMap).execute().actionGet();
}
} catch (Throwable e) {
e.printStackTrace();
}
}
}
/**
* 搜索
* @param kw
* @param num
* @return
*/
public PageBean<HtmlBean> search(String kw,int num,int count){
PageBean<HtmlBean> wr =new PageBean<HtmlBean>();
wr.setIndex(num);
// //构建查询条件
// MatchQueryBuilder q1 =new MatchQueryBuilder("title", kw);
// MatchQueryBuilder q2 =new MatchQueryBuilder("content", kw);
//
// //构建一个多条件查询对象
// BoolQueryBuilder q =new BoolQueryBuilder(); //组合查询条件对象
// q.should(q1);
// q.should(q2);
// RangeQueryBuilder q1 =new RangeQueryBuilder("age");
// q1.from(18);
// q1.to(40);
MultiMatchQueryBuilder q =new MultiMatchQueryBuilder(kw, new String[]{"title","content"});
SearchResponse resp=null;
if(wr.getIndex()==1){
resp = client.prepareSearch("bjsxt")
.setTypes("htmlbean")
.setQuery(q)
.addHighlightedField("title")
.addHighlightedField("content")
.setHighlighterPreTags("<font color=\"red\">")
.setHighlighterPostTags("</font>")
.setHighlighterFragmentSize(40)//设置显示结果中一个碎片段的长度
.setHighlighterNumOfFragments(5)//设置显示结果中每个结果最多显示碎片段,每个碎片段之间用...隔开
.setFrom(0)
.setSize(10)
.execute().actionGet();
}else{
wr.setTotalCount(count);
resp = client.prepareSearch("bjsxt")
.setTypes("htmlbean")
.setQuery(q)
.addHighlightedField("title")
.addHighlightedField("content")
.setHighlighterPreTags("<font color=\"red\">")
.setHighlighterPostTags("</font>")
.setHighlighterFragmentSize(40)
.setHighlighterNumOfFragments(5)
.setFrom(wr.getStartRow())
.setSize(10)
.execute().actionGet();
}
SearchHits hits= resp.getHits();
wr.setTotalCount((int)hits.getTotalHits());
for(SearchHit hit : hits.getHits()){
HtmlBean bean =new HtmlBean();
if(hit.getHighlightFields().get("title")==null){//title中没有包含关键字
bean.setTitle(hit.getSource().get("title").toString());//获取原来的title(没有高亮的title)
}else{
bean.setTitle(hit.getHighlightFields().get("title").getFragments()[0].toString());
}
if(hit.getHighlightFields().get("content")==null){//title中没有包含关键字
bean.setContent(hit.getSource().get("content").toString());//获取原来的title(没有高亮的title)
}else{
StringBuilder sb =new StringBuilder();
for(Text text: hit.getHighlightFields().get("content").getFragments()){
sb.append(text.toString()+"...");
}
bean.setContent(sb.toString());
}
bean.setUrl("http://"+hit.getSource().get("url").toString());
wr.setBean(bean);
}
return wr;
}
// @Test
// public void del(){
//// client.admin().indices().prepareDelete("bjsxt").execute().actionGet();
// client.admin().indices().prepareDelete("bjsxt2").execute().actionGet();
// }
}
## 将linux wget 爬取到的数据存放到D:\\下。
## 运行addHtmlToES()方法,数据文档添加到es中
## 如下时对项目:ES_SEARCH的演示效果。

window 查看端口和pid,杀死pid C:\WINDOWS\system32>netstat -ano | findstr 8080 TCP 0.0.0.0:8080 0.0.0.0:0 LISTENING 9448 TCP [::]:8080 [::]:0 LISTENING 9448 C:\WINDOWS\system32>taskkill /PID 9448 /F


浙公网安备 33010602011771号