es学习
1.官网直接安装
官网下载:https://www.elastic.co/cn/downloads/elasticsearch
mac安装es:
下载完成后,打开bin文件夹下的elasticsearch文件,注意jdk版本要正确。因为我电脑有两个jdk版本,默认是jdk7.

执行vim ~/.bash_profile

进入编辑模式,输入i,修改后,esc退出,输入:wq退出insert

安装成功,访问http://localhost:9200/

2.docker安装
mac安装docker:https://www.runoob.com/docker/macos-docker-install.html
3.安装kibana
下载地址:https://www.elastic.co/cn/downloads/kibana
安装:打开bin文件夹,打开文件kibana
安装遇到错误:
Error: getaddrinfo ENOTFOUND localhost,是由于localhost没有绑定到127.0.0.1
启动后,在浏览器上打开 http://localhost:5601/

如果想修改Kibana连接的Elasticsearch地址,或是Kibana自身的端口5601,可以在Kibana目录下的config下面的kibana.yml文件中进行修改;
4.es分词器安装
安装指南:https://github.com/medcl/elasticsearch-analysis-ik
两种安装方式:
a.下载解压后安装,下载地址:https://github.com/medcl/elasticsearch-analysis-ik/releases
b.直接命令安装:./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.9.0/elasticsearch-analysis-ik-7.9.0.zip
安装完重启es
5.term vectors
官网文档:https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html#docs-termvectors-api-term-info
PUT /my-index-000001
{ "mappings": {
"properties": {
"text": {
"type": "text",
"term_vector": "with_positions_offsets_payloads",
"store" : true,
"analyzer" : "fulltext_analyzer"
},
"fullname": {
"type": "text",
"term_vector": "with_positions_offsets_payloads",
"analyzer" : "fulltext_analyzer"
}
}
},
"settings" : {
"index" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
},
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"type_as_payload"
]
}
}
}
}
}
PUT /my-index-000001/_doc/1
{
"fullname" : "John Doe",
"text" : "test test test "
}
PUT /my-index-000001/_doc/2?refresh=wait_for
{
"fullname" : "Jane Doe",
"text" : "Another test ..."
}
PUT /my-index-000001/_doc/3?refresh=wait_for
{
"fullname" : "huyanxia liangming",
"text" : "test Another baby ..."
}
GET /my-index-000001/_termvectors
{
"fields" : ["text"],
"offsets" : true,
"payloads" : true,
"positions" : true,
"term_statistics" : true,
"field_statistics" : true
}
GET /my-index-000001/_termvectors
{
"doc" : {
"fullname" : "John Doe diannao",
"text" : "test test test"
},
"filter": {
"max_num_terms": 3,
"min_term_freq": 1,
"min_doc_freq": 1
}
}
6.聚合计算,es版本7.9.1
PUT /user_profiles1
{
"settings": {
"index": {
"number_of_shards": "32",
"number_of_replicas": "1"
}
},
"mappings": {
"properties": {
"type": {
"type": "keyword"
},
"user_id": {
"type": "keyword"
},
"item_id": {
"type": "keyword"
},
"boost": {
"type": "double"
},
"created": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
},
"keywords": {
"type": "nested",
"properties": {
"word": {
"type": "keyword"
},
"weight": {
"type": "double"
}
}
}
}
}
}
PUT /user_profiles1/_doc/1_1_1001
{
"type": "1",
"user_id": "1",
"item_id": "1001",
"factor": 1.2,
"created" : "2020-09-07 14:54:37",
"keywords": [
{
"word": "中国",
"weight": 3.2
},
{
"word": "美国",
"weight": 1.4
}
]
}
PUT /user_profiles1/_doc/1_1_1002
{
"type": "1",
"user_id": "1",
"item_id": "1002",
"factor": 1.2,
"created" : "2020-09-07 14:54:37",
"keywords": [
{
"word": "中国辅导费",
"weight": 6.2
},
{
"word": "美国当时的",
"weight": 1.9
}
]
}
POST /user_profiles1/_search
{
"query": {
"bool": {
"must": [{
"terms": {
"type": [
"1"
]
}
},
{
"term": {
"user_id": {
"value": "1"
}
}
},
{
"range": {
"created": {
"gte": "2020-09-07 14:54:37"
}
}
}
]
}
},
"size": 0,
"aggs": {
"agg_keywords": {
"nested": {
"path": "keywords"
},
"aggs": {
"agg_word": {
"terms": {
"field": "keywords.word",
"order": {
"agg_score": "desc"
},
"size": 2 //决定返回大小
},
"aggs": {
"agg_score": {
"sum": {
"field": "keywords.weight"
}
}
}
}
}
}
}
}
7.从本地读取文件
8.termVector es2.1
//第一种
TermVectorsResponse termVectorResponse = ElasticSearchUtils.getEsClient()
.prepareTermVectors()
.setIndex("knowledge_items")
.setType("knowledge_items")
.setId(itemId)
.setSelectedFields("content")
.setTermStatistics(true)
.setFieldStatistics(false)
.setOffsets(false)
.setPayloads(false)
.setPositions(false)
.execute()
.actionGet();
//第二种
TermVectorsRequest termVectorsRequest = new TermVectorsRequest();
//设置参数 ElasticSearchUtils.getEsClient().termVectors(termVectorsRequest).actionGet();
结果json化输出
try {
XContentBuilder builder = XContentFactory.jsonBuilder();
builder.startObject();
termVectorResponse.toXContent(builder, ToXContent.EMPTY_PARAMS);
builder.endObject();
System.out.println("json termVectorResponse:" + builder.string());
} catch (IOException e) {
e.printStackTrace();
}
结果遍历
Fields fields = termVectorResponse.getFields();
Iterator<String> iterator = fields.iterator();
while (iterator.hasNext()) {
String field = iterator.next();
Terms terms = fields.terms(field);
int docCount = terms.getDocCount();//field_statistics
TermsEnum termsEnum = terms.iterator();
int currentTotalTermFreq = 0;
List<TermInfoEntity> termInfoEntities = new ArrayList<>();
while (termsEnum.next() != null) {//每个词条
BytesRef term = termsEnum.term();
String termName = term.utf8ToString();
if(NumUtils.isNum(termName) || termName.length() == 1){
LOG.info("termName filter:{}" + termName);
continue;
}
if (term != null) {
int docFreq = termsEnum.docFreq();
int termFreq = termsEnum.postings(null, PostingsEnum.FREQS).freq();
currentTotalTermFreq = currentTotalTermFreq + termFreq;
TermInfoEntity termInfoEntity = new TermInfoEntity(term.utf8ToString(), termFreq, docFreq);
termInfoEntities.add(termInfoEntity);
}
}
int finalCurrentTotalTermFreq = currentTotalTermFreq;
double finalItemBoost = itemBoost;
//计算每个词的tf-idf
termInfoEntities.forEach(termInfoEntity -> {
double tf = (double) termInfoEntity.getTermFreq()/ (double) finalCurrentTotalTermFreq;
double idf = Math.log10(docCount/termInfoEntity.getDocFreq()) + 1;
double tfIDf = NumUtils.doubleValueScale(6, tf * idf);
KeyWordEntity keyWordEntity = new KeyWordEntity(termInfoEntity.getTermName(), tfIDf * userActionTypeEnum.getBoost() * finalItemBoost);
keyWordEntities.add(keyWordEntity);
});
}
} catch (IOException e) {
LOG.error("es termVectorResponse 遍历失败:", e);
}
9. es6 报错解决
原来代码:es2中不报错
setSource(JSON.toJSONStringWithDateFormat(sendMsgRecordEntity,JSON.DEFFAULT_DATE_FORMAT))
es6中报错如下:The number of object passed must be even but was [1]
setSource加上一个参数:XContentType.JSON
setSource(JSON.toJSONStringWithDateFormat(sendMsgRecordEntity,JSON.DEFFAULT_DATE_FORMAT), XContentType.JSON)
浙公网安备 33010602011771号