Elasticsearch-py:Elasticsearch在Python中的应用
Elasticsearch在Python中的应用。通过Elasticsearch,我们可以轻松实现复杂的全文检索、数据分析等功能。
1. 环境准备
# 安装elasticsearch包
pip install elasticsearch
2. 基础连接设置
from elasticsearch import Elasticsearch # 创建ES客户端连接 es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) # 检查连接状态 if es.ping(): print("连接成功!") else: print("连接失败!")
3. 创建索引和文档
# 创建索引
index_settings = {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"title": {"type": "text"},
"content": {"type": "text"},
"tags": {"type": "keyword"}
}
}
}
# 创建名为 'blog_posts' 的索引
es.indices.create(index='blog_posts', body=index_settings)
# 添加文档
doc = {
'title': 'Python编程入门',
'content': '这是一篇关于Python基础的博客文章',
'tags': ['python', '编程', '教程']
}
# 插入文档
es.index(index='blog_posts', body=doc)
text类型会进行分词,适合全文搜索;
keyword类型不分词,适合精确匹配和聚合分析。
4. 基本搜索操作
# 简单搜索
def simple_search(keyword):
query = {
"query": {
"multi_match": {
"query": keyword,
"fields": ["title", "content"]
}
}
}
result = es.search(index='blog_posts', body=query)
return result['hits']['hits']
# 测试搜索
results = simple_search("Python")
for hit in results:
print(f"得分: {hit['_score']}")
print(f"标题: {hit['_source']['title']}")
5. 高级搜索示例
def advanced_search(keyword, tags=None, min_score=0.5):
query = {
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": keyword,
"fields": ["title^2", "content"] # 标题字段权重加倍
}
}
],
"filter": [] if tags is None else [
{"terms": {"tags": tags}}
]
}
},
"min_score": min_score
}
return es.search(index='blog_posts', body=query)
# 搜索包含特定标签的文档
results = advanced_search("Python", tags=['教程'])
6. 批量操作示例
from elasticsearch.helpers import bulk
def bulk_index_documents(documents):
actions = [
{
"_index": "blog_posts",
"_source": doc
}
for doc in documents
]
success, failed = bulk(es, actions)
print(f"成功索引: {success}条")
print(f"失败数量: {len(failed)}条")
# 批量添加文档
sample_docs = [
{"title": "ES入门", "content": "ES基础教程", "tags": ["elasticsearch"]},
{"title": "搜索优化", "content": "提高搜索质量", "tags": ["搜索", "优化"]}
]
bulk_index_documents(sample_docs)
7. 聚合分析示例
def analyze_tags():
query = {
"aggs": {
"popular_tags": {
"terms": {
"field": "tags",
"size": 10
}
}
}
}
result = es.search(index='blog_posts', body=query)
return result['aggregations']['popular_tags']['buckets']
# 获取最热门的标签
popular_tags = analyze_tags()
for tag in popular_tags:
print(f"标签: {tag['key']}, 数量: {tag['doc_count']}")

浙公网安备 33010602011771号