ES-es-ElasticSearch:python操作

 

es创建索引

# coding=utf-8

from __future__ import print_function
from elasticsearch import Elasticsearch

ES_HOSTS = ['elastic:xxxx@es-cn-xxx.elasticsearch.aliyuncs.com:9200']
INDEX_NAME = 'goodlook'
DOC_TYPE = 'post'

es = Elasticsearch(hosts=ES_HOSTS)

# 如果已经存在必须先删除再创建
res = es.indices.delete(index=INDEX_NAME)

request_body = {
    "mappings": {
        DOC_TYPE: {
            "properties": {
                "star": {
                    "type": "text",
                    "analyzer": "ik_max_word",
                    "search_analyzer": "ik_smart"
                },
                "gender": {
                    "type": "text",
                    "analyzer": "ik_max_word",
                    "search_analyzer": "ik_smart"
                },
                "age": {
                    "type": "text",
                    "analyzer": "ik_max_word",
                    "search_analyzer": "ik_smart"
                },
                "scene": {
                    "type": "text",
                    "analyzer": "ik_max_word",
                    "search_analyzer": "ik_smart"
                },
                "season": {
                    "type": "text",
                    "analyzer": "ik_max_word",
                    "search_analyzer": "ik_smart"
                },
                "style": {
                    "type": "text",
                    "analyzer": "ik_max_word",
                    "search_analyzer": "ik_smart"
                },
                "color": {
                    "type": "text",
                    "analyzer": "ik_max_word",
                    "search_analyzer": "ik_smart"
                },
                "content": {
                    "type": "text",
                    "analyzer": "ik_max_word",
                    "search_analyzer": "ik_smart"
                },
                "items": {
                    "type": "nested",
                    "properties": {
                        "category": {
                            "type": "text",
                            "analyzer": "ik_max_word",
                            "search_analyzer": "ik_smart"
                        },
                        "color": {
                            "type": "text",
                            "analyzer": "ik_max_word",
                            "search_analyzer": "ik_smart"
                        },
                        "attributes": {
                            "type": "text",
                            "analyzer": "ik_max_word",
                            "search_analyzer": "ik_smart"
                        }
                    }
                }
            }
        }
    }
}

res = es.indices.create(index=INDEX_NAME, ignore=400, body=request_body)
print(" response: {}".format(res))

es搜索

# coding=utf-8


from __future__ import print_function
from elasticsearch import Elasticsearch

ES_HOSTS = ['elastic:xxx@es-cn-xxx.elasticsearch.aliyuncs.com:9200']
INDEX_NAME = 'goodlook'
DOC_TYPE = 'post'

es = Elasticsearch(hosts=ES_HOSTS)

q = "女士 春秋 连衣裙"
resp = es.search(
        index='goodlook',
        doc_type='post',
        size=10,
        body={
            "query": {
                "multi_match": {
                    "query": q,
                    "fields": ["gender^50",  "items.category^100", "color^100", "age", "star", "scene"]
                }
            }
        })

print("response: {}".format(resp))
# /usr/bin/python3.6 /root/backend/aaa_xwk/es搜索.py

es类实现

#coding:utf8
import os
import time
from os import walk
from datetime import datetime
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
# es版本6.0


class MyElasticSearch(object):
    def __init__(self, index_name, index_type, ip="127.0.0.1", port=9200):

        '''
        :param index_name: 索引名称
        :param index_type: 索引类型
        '''
        self.index_name =index_name
        self.index_type = index_type
        self.ip = ip
        self.port = port
        # 无用户名密码状态
        self.es = Elasticsearch([ip], port=self.port, timeout=50000)
        #用户名密码状态
        # self.es = Elasticsearch([self.ip], http_auth=('elastic', 'lyp82nLF'), port=self.port)

    def create_index(self):
        '''
        创建索引, 6.0以后type移除string类型,index移除not_analyzed", index"的值只能是boolean变量
        :param:
        :return:
        '''

        #创建映射
        _index_mappings = {
            "mappings": {
                self.index_type: {
                    "properties": {
                        "title": {
                            "type": "text",
                            "index": True,
                            "analyzer": "ik_max_word",
                            "search_analyzer": "ik_smart",
                        },
                        "date": {
                            "type": "text",
                            "index": True
                        },
                        "keyword": {
                            "type": "text",
                            "index": True
                        },
                        "source": {
                            "type": "text",
                            "index": False
                        },
                        "link": {
                            "type": "text",
                            "index": False
                        }
                    }
                }

            }
        }
        if self.es.indices.exists(index=self.index_name) is not True:
            es_index = self.es.indices.create(index=self.index_name, ignore=400, body=_index_mappings)
            print("ES_index: ", es_index)

    def insert_data(self):
        '''
        数据存储到es, 使用index或者create方法
        :return:
        '''

        insert_list = [
            {   "date": "2017-09-13",
                "source": "慧聪网",
                "link": "http://info.broadcast.hc360.com/2017/09/130859749974.shtml",
                "keyword": "电视",
                "title": "001"
             },
            {   "date": "2017-09-13",
                "source": "中国文明网",
                "link": "http://www.wenming.cn/xj_pd/yw/201709/t20170913_4421323.shtml",
                "keyword": "电视",
                "title": "002"
             }
              ]
        for item in insert_list:
            # res = self.es.index(index=self.index_name, doc_type=self.index_type, body=item)
            res = self.es.create(index=self.index_name, doc_type=self.index_type, id=insert_list.index(item)+10, body=item)
            print(res)

    def insert_data_by_bulk(self):
        """
        bulk insert
        :return: 
        """

        bulk_list = [
            {"date": "2017-09-13",
             "source": "慧聪网",
             "link": "http://info.broadcast.hc360.com/2017/09/130859749974.shtml",
             "keyword": "电视",
             "title": "付费 电视 行业面临的转型和挑战"
             },
            {"date": "2017-09-13",
             "source": "中国文明网",
             "link": "http://www.wenming.cn/xj_pd/yw/201709/t20170913_4421323.shtml",
             "keyword": "电视",
             "title": "电视 专题片《巡视利剑》广获好评:铁腕反腐凝聚党心民心"
             },
            {"date": "2017-09-13",
             "source": "人民电视",
             "link": "http://tv.people.com.cn/BIG5/n1/2017/0913/c67816-29533981.html",
             "keyword": "电视",
             "title": "中国第21批赴刚果(金)维和部隊启程--人民 电视 --人民网"
             },
            {"date": "2017-09-13",
             "source": "站长之家",
             "link": "http://www.chinaz.com/news/2017/0913/804263.shtml",
             "keyword": "电视",
             "title": "电视 盒子 哪个牌子好? 吐血奉献三大选购秘笈"
             }
        ]

        actions = []
        i = 1
        for line in bulk_list:
            action = {
                "_index": self.index_name,
                "_type": self.index_type,
                "_id": i,
                "_source": {
                    "date": line['date'],
                    "source": line['source'],
                    "link": line['link'],
                    "keyword": line['keyword'],
                    "title": line['title']}
            }
            i += 1
            actions.append(action)
            # 批量处理
        success, _ = bulk(self.es, actions, index=self.index_name, raise_on_error=True)
        print('Performed %d actions' % success)

    def get_data_by_id(self, id):
        """
        本方法使用es.get()方法,返回一个数据在_source里即是,没有外层的['hits']['hits']
        :param id: 
        :return: 
        """

        res = self.es.get(index=self.index_name, doc_type=self.index_type, id=id)
        print(res)
        print(res['_source'])
        return res, res['_source']

    def get_data_by_query(self):
        """
        本方法使用es.search()方法,返回数据在外层的['hits']['hits']的_source里面
        :return: 
        """
        # doc = {'query': {'match_all': {}}}
        doc = {
            "query": {
                "match": {
                    "keyword": "电视"
                }
            }
        }
        _searched = self.es.search(index=self.index_name, doc_type=self.index_type, body=doc)

        for hit in _searched['hits']['hits']:
            date = hit['_source']['date']
            source = hit['_source']['source']
            link = hit['_source']['link']
            keyword = hit['_source']['keyword']
            title = hit['_source']['title']
            print(date, source, link, keyword, title)

        return _searched, _searched['hits']['hits']

    def update_data(self, id):
        '''
        删除索引中的一条
        :param id:
        :return:
        '''

        res = self.es.update(index='goodlook', doc_type='post', id=id, body={'doc': {'title': "003"}})
        print(res)


    def update_data_by_bulk(self):
        '''
        删除索引
        :param id:
        :return:
        '''

        # {
        #     '_op_type': 'update',
        #     '_index': 'goodlook',
        #     '_type': 'post',
        #     '_id': 42,
        #     'doc': {'question': 'The life, universe and everything.'}
        # }
        actions = [
            {'_op_type': 'update', '_id': 0, 'doc': {'keyword': '网络'}},
            {'_op_type': 'update', '_id': 10, 'doc': {'keyword': '网络'}},
            {'_op_type': 'update', '_id': 11, 'doc': {'keyword': '网络'}},
        ]

        success, _ = bulk(self.es, actions, index=self.index_name, doc_type=self.index_type, raise_on_error=True)
        print('Performed %d actions' % success)


    def delete_one_data(self, id):
        '''
        删除索引中的一条
        :param id:
        :return:
        '''
        res = self.es.delete(index=self.index_name, doc_type=self.index_type, id=id)
        print(res)

    def delete_all(self):
        '''
        删除索引
        :param id:
        :return:
        '''

        # {
        #     '_op_type': 'delete',
        #     '_index': 'goodlook',
        #     '_type': 'post',
        #     '_id': 42,
        # }

        actions = [
            {'_op_type': 'delete', '_id': 1},
            {'_op_type': 'delete', '_id': 2},
            {'_op_type': 'delete', '_id': 3},
            {'_op_type': 'delete', '_id': 4},
            {'_op_type': 'delete', '_id': 5},
        ]

        success, _ = bulk(self.es, actions, index=self.index_name, doc_type=self.index_type, raise_on_error=True)
        print('Performed %d actions' % success)

    def delete_index(self):
        res = self.es.indices.delete(index=self.index_name)


es_store = MyElasticSearch("goodlook", "post", "localhost", 9200)

# 创建一次即注释,不能重复重建索引
res = es_store.create_index()
print(res)
# ES_index:  {'error': {'root_cause': [{'type': 'mapper_parsing_exception', 'reason': 'analyzer [ik_smart] not found for field [title]'}], 'type': 'mapper_parsing_exception', 'reason': 'Failed to parse mapping [post]: analyzer [ik_smart] not found for field [title]', 'caused_by': {'type': 'mapper_parsing_exception', 'reason': 'analyzer [ik_smart] not found for field [title]'}}, 'status': 400}
# None
# 本地就出现了,原来我屏蔽了


# 删除索引
# es_store.delete_index()


# 插入一个数据
# insert_data = es_store.insert_data()
# print(insert_data)

# 批量插入数据
# insert_data = es_store.insert_data_by_bulk()
# print(insert_data)

# 查询一条数据
# data, data01 = es_store.get_data_by_id(4)
# print(data01)
# {'date': '2017-09-13',
#  'source': '站长之家',
#  'link': 'http://www.chinaz.com/news/2017/0913/804263.shtml',
#  'keyword': '电视',
#  'title': '电视 盒子 哪个牌子好? 吐血奉献三大选购秘笈'
#  }

# 查询所有数据
# data, data1 = es_store.get_data_by_query()

# 更新一条数据
# data = es_store.update_data(11)
# print(data)

# 批量更新数据
# data = es_store.update_data_by_bulk()
# print(data)

# 删除一条数据
# data = es_store.delete_one_data(11)
# print(data)

# 批量删除数据
# data = es_store.delete_all()
# print(data)

# 你要清楚索引结构, 数据结构,和返回数据结构

 

posted @ 2019-04-13 15:52  Adamanter  阅读(655)  评论(0)    收藏  举报