Elasticsearch-py:Elasticsearch在Python中的应用

Elasticsearch在Python中的应用。通过Elasticsearch,我们可以轻松实现复杂的全文检索、数据分析等功能。

1. 环境准备


# 安装elasticsearch包
pip install elasticsearch

2. 基础连接设置


from elasticsearch import Elasticsearch
# 创建ES客户端连接
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
# 检查连接状态
if es.ping():
    print("连接成功!")
else:
    print("连接失败!")

3. 创建索引和文档


# 创建索引
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 1
    },
    "mappings": {
        "properties": {
            "title": {"type": "text"},
            "content": {"type": "text"},
            "tags": {"type": "keyword"}
        }
    }
}
# 创建名为 'blog_posts' 的索引
es.indices.create(index='blog_posts', body=index_settings)
# 添加文档
doc = {
    'title': 'Python编程入门',
    'content': '这是一篇关于Python基础的博客文章',
    'tags': ['python', '编程', '教程']
}
# 插入文档
es.index(index='blog_posts', body=doc)
text类型会进行分词,适合全文搜索; keyword类型不分词,适合精确匹配和聚合分析。

4. 基本搜索操作


# 简单搜索
def simple_search(keyword):
    query = {
        "query": {
            "multi_match": {
                "query": keyword,
                "fields": ["title", "content"]
            }
        }
    }    
    result = es.search(index='blog_posts', body=query)
    return result['hits']['hits']
# 测试搜索
results = simple_search("Python")
for hit in results:
    print(f"得分: {hit['_score']}")
    print(f"标题: {hit['_source']['title']}")

5. 高级搜索示例


def advanced_search(keyword, tags=None, min_score=0.5):
    query = {
        "query": {
            "bool": {
                "must": [
                    {
                        "multi_match": {
                            "query": keyword,
                            "fields": ["title^2", "content"]  # 标题字段权重加倍
                        }
                    }
                ],
                "filter": [] if tags is None else [
                    {"terms": {"tags": tags}}
                ]
            }
        },
        "min_score": min_score
    }    
    return es.search(index='blog_posts', body=query)
# 搜索包含特定标签的文档
results = advanced_search("Python", tags=['教程'])

6. 批量操作示例


from elasticsearch.helpers import bulk
def bulk_index_documents(documents):
    actions = [
        {
            "_index": "blog_posts",
            "_source": doc
        }
        for doc in documents
    ]    
    success, failed = bulk(es, actions)
    print(f"成功索引: {success}条")
    print(f"失败数量: {len(failed)}条")
# 批量添加文档
sample_docs = [
    {"title": "ES入门", "content": "ES基础教程", "tags": ["elasticsearch"]},
    {"title": "搜索优化", "content": "提高搜索质量", "tags": ["搜索", "优化"]}
]
bulk_index_documents(sample_docs)

7. 聚合分析示例


def analyze_tags():
    query = {
        "aggs": {
            "popular_tags": {
                "terms": {
                    "field": "tags",
                    "size": 10
                }
            }
        }
    }    
    result = es.search(index='blog_posts', body=query)
    return result['aggregations']['popular_tags']['buckets']
# 获取最热门的标签
popular_tags = analyze_tags()
for tag in popular_tags:
    print(f"标签: {tag['key']}, 数量: {tag['doc_count']}")

来源:https://www.iwmyx.cn/pythonssyqelasticsea.html

posted @ 2025-01-02 17:08  武穆逸仙  阅读(82)  评论(0)    收藏  举报

为天地立心 为生民立命 为往圣继绝学 为万世开太平