IndexService

package com.sxt.es;

import java.io.File;
import java.net.InetAddress;
import java.util.HashMap;
import java.util.Map;

import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryParser;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.junit.Test;
import org.springframework.stereotype.Service;

import com.sxt.util.HtmlTool;

@Service
public class IndexService {

	//存放html文件的目录
	public static String DATA_DIR="e:\\data\\";
	
	public static Client client;

	static {
		Settings settings = Settings.settingsBuilder()
				.put("cluster.name", "bjsxtcluster").build();
		try {
			client = TransportClient
					.builder()
					.settings(settings)
					.build()
					.addTransportAddress(
							new InetSocketTransportAddress(InetAddress
									.getByName("node01"), 9300))
					.addTransportAddress(
							new InetSocketTransportAddress(InetAddress
									.getByName("node02"), 9300))
					.addTransportAddress(
							new InetSocketTransportAddress(InetAddress
									.getByName("node03"), 9300));
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	/**
	 * admin():管理索引库的。client.admin().indices()
	 * 
	 * 索引数据的管理:client.prepare
	 * 
	 */
	@Test
	public void createIndex() throws Exception {
		IndicesExistsResponse resp = client.admin().indices().prepareExists("bjsxt").execute().actionGet();
		if(resp.isExists()){
			client.admin().indices().prepareDelete("bjsxt").execute().actionGet();
		}
		client.admin().indices().prepareCreate("bjsxt").execute().actionGet();

		new XContentFactory();

		XContentBuilder builder = XContentFactory.jsonBuilder().startObject()
				.startObject("htmlbean").startObject("properties")
				.startObject("title").field("type", "string")
				.field("store", "yes").field("analyzer", "ik_max_word")
				.field("search_analyzer", "ik_max_word").endObject()
				.startObject("content").field("type", "string")
				.field("store", "yes").field("analyzer", "ik_max_word")
				.field("search_analyzer", "ik_max_word").endObject()
//				.startObject("url").field("type", "string")
//				.field("store", "yes").field("analyzer", "ik_max_word")
//				.field("search_analyzer", "ik_max_word").endObject()
				.endObject().endObject().endObject();
		PutMappingRequest mapping = Requests.putMappingRequest("bjsxt").type("htmlbean").source(builder);
		client.admin().indices().putMapping(mapping).actionGet();

	}
	
	/**
	 * 把源数据html文件添加到索引库中(构建索引文件)
	 */
	@Test
	public void addHtmlToES(){
		readHtml(new File(DATA_DIR));
	}
	
	/**
	 * 遍历数据文件目录d:/data ,递归方法
	 * @param file
	 */
	public void readHtml(File file){
		if(file.isDirectory()){
			File[]  fs =file.listFiles();
			for (int i = 0; i < fs.length; i++) {
				File f = fs[i];
				readHtml(f);
			}
		}else{
			HtmlBean bean;
			try {
				bean = HtmlTool.parserHtml(file.getPath());
				if(bean!=null){
					Map<String, String> dataMap =new HashMap<String, String>();
					dataMap.put("title", bean.getTitle());
					dataMap.put("content", bean.getContent());
					dataMap.put("url", bean.getUrl());
					//写索引
					client.prepareIndex("bjsxt", "htmlbean").setSource(dataMap).execute().actionGet();
				}
			} catch (Throwable e) {
				e.printStackTrace();
			}
			
		}
	}
	
	/**
	 * 搜索
	 * @param kw
	 * @param num
	 * @return
	 */
	public PageBean<HtmlBean> search(String kw,int num,int count){
		PageBean<HtmlBean> wr =new PageBean<HtmlBean>();
		wr.setIndex(num);
//		//构建查询条件
//		MatchQueryBuilder q1 =new MatchQueryBuilder("title", kw);
//		MatchQueryBuilder q2 =new MatchQueryBuilder("content", kw);
//		
//		//构建一个多条件查询对象
//		BoolQueryBuilder q =new BoolQueryBuilder(); //组合查询条件对象
//		q.should(q1);
//		q.should(q2);
		
//		RangeQueryBuilder q1 =new RangeQueryBuilder("age");
//		q1.from(18);
//		q1.to(40);
		
		MultiMatchQueryBuilder q =new MultiMatchQueryBuilder(kw, new String[]{"title","content"});
		SearchResponse resp=null;
		if(wr.getIndex()==1){
			resp = client.prepareSearch("bjsxt")
					.setTypes("htmlbean")
					.setQuery(q)
					.addHighlightedField("title")
					.addHighlightedField("content")
					.setHighlighterPreTags("<font color=\"red\">")
					.setHighlighterPostTags("</font>")
					.setHighlighterFragmentSize(40)//设置显示结果中一个碎片段的长度
					.setHighlighterNumOfFragments(5)//设置显示结果中每个结果最多显示碎片段,每个碎片段之间用...隔开
					.setFrom(0)
					.setSize(10)
					.execute().actionGet();
			
		}else{
			wr.setTotalCount(count);
			resp = client.prepareSearch("bjsxt")
					.setTypes("htmlbean")
					.setQuery(q)
					.addHighlightedField("title")
					.addHighlightedField("content")
					.setHighlighterPreTags("<font color=\"red\">")
					.setHighlighterPostTags("</font>")
					.setHighlighterFragmentSize(40)
					.setHighlighterNumOfFragments(5)
					.setFrom(wr.getStartRow())
					.setSize(10)
					.execute().actionGet();
		}
		SearchHits hits= resp.getHits();
		wr.setTotalCount((int)hits.getTotalHits());
		
		for(SearchHit hit : hits.getHits()){
			HtmlBean bean =new HtmlBean();
			if(hit.getHighlightFields().get("title")==null){//title中没有包含关键字
				bean.setTitle(hit.getSource().get("title").toString());//获取原来的title(没有高亮的title)
			}else{
				bean.setTitle(hit.getHighlightFields().get("title").getFragments()[0].toString());
			}
			if(hit.getHighlightFields().get("content")==null){//title中没有包含关键字
				bean.setContent(hit.getSource().get("content").toString());//获取原来的title(没有高亮的title)
			}else{
				StringBuilder sb =new StringBuilder();
				for(Text text: hit.getHighlightFields().get("content").getFragments()){
					sb.append(text.toString()+"...");
				}
				bean.setContent(sb.toString());
			}
			
			bean.setUrl("http://"+hit.getSource().get("url").toString());
			wr.setBean(bean);
			
		}
		
		
		return wr;
	}
}

  

posted @ 2018-07-05 12:53  uuhh  阅读(618)  评论(0)    收藏  举报