mongodb聚合操作$使用例子
$操作符使用
$操作符一般用作
== index a ==
1. $addField 增加一个字段
使用场景一:查询的结果中增加字段
数据类型如下
{
"_id" : ObjectId("610cd5a0702aac3e62a77a13"),
"name" : "zhang",
"intro" : "a"
}
/* 2 */
{
"_id" : ObjectId("610cd5a4702aac3e62a77a14"),
"name" : "zhang",
"intro" : "b"
}
/* 3 */
{
"_id" : ObjectId("610cd5a6702aac3e62a77a15"),
"name" : "zhang",
"intro" : "b"
}
demo1:返回的结果中增加一个字段,保存执行的值
match_dict = {"$match":{"name": "zhang"}}
add_dict = {"$addFields": {"intro_list":"$intro"}}
ret = db["test"].aggregate([match_dict,add_dict])
print(list(ret))
# 结果
[{'_id': ObjectId('610cd5a0702aac3e62a77a13'), 'name': 'zhang', 'intro': 'a', 'intro_list': 'a'}, {'_id': ObjectId('610cd5a4702aac3e62a77a14'), 'name': 'zhang', 'intro': 'b', 'intro_list': 'b'}, {'_id': ObjectId('610cd5a6702aac3e62a77a15'), 'name': 'zhang', 'intro': 'b', 'intro_list': 'b'}]
demo2:将一个值类型转换后,保存到增加的字段
match_dict = {"$match": {"name": "zhang"}}
# 将查询结果的值进行转换,保存到一个指定的字段中,多用于lookup时候。
add_dict = {"$addFields": {"str_id": {"$toString":"$_id"}}}
ret = db["test"].aggregate([match_dict, add_dict])
# 结果
[{'_id': ObjectId('610cd5a0702aac3e62a77a13'), 'name': 'zhang', 'intro': 'a', 'str_id': '610cd5a0702aac3e62a77a13'}, {'_id': ObjectId('610cd5a4702aac3e62a77a14'), 'name': 'zhang', 'intro': 'b', 'str_id': '610cd5a4702aac3e62a77a14'}, {'_id': ObjectId('610cd5a6702aac3e62a77a15'), 'name': 'zhang', 'intro': 'b', 'str_id': '610cd5a6702aac3e62a77a15'}]
ObjectId转换为str类型
# 以前都是查出结果后,循环str将id从Object转换为str,现在可以一步到位
add_dict = {"$addFields": {"_id": {"$toString":"$_id"}}}
# 结果为
[{'_id': '610cd5a0702aac3e62a77a13', 'name': 'zhang', 'intro': 'a'}, {'_id': '610cd5a4702aac3e62a77a14', 'name': 'zhang', 'intro': 'b'}, {'_id': '610cd5a6702aac3e62a77a15', 'name': 'zhang', 'intro': 'b'}]
数据类型如下
/* 1 */
{
"_id" : ObjectId("610cd5a0702aac3e62a77a13"),
"name" : "china",
"city_gdp" : [
{
"city" : "beijing",
"gdp" : 100
},
{
"city" : "shanghai",
"gdp" : 200
},
{
"city" : "tianjin",
"gdp" : 300
},
{
"city" : "chongqing",
"gdp" : 400
}
]
}
demo3:求gdp的总和和平均值
match_dict = {"$match": {"name": "china"}}
# 增加两个字段:保存总和和平均值
add_dict = {"$addFields": {"total": {"$sum":"$city_gdp.gdp"},"avg":{"$avg":"$city_gdp.gdp"}}}
ret = db["test"].aggregate([match_dict, add_dict])
print(list(ret))
# 结果
[{'_id': ObjectId('610cd5a0702aac3e62a77a13'), 'name': 'china', 'city_gdp': [{'city': 'beijing', 'gdp': 100}, {'city': 'shanghai', 'gdp': 200}, {'city': 'tianjin', 'gdp': 300}, {'city': 'chongqing', 'gdp': 400}], 'total': 1000, 'avg': 250.0}]
使用场景二:给原始数据增加字段---类似于修改表结构
数据结构如下
/* 2 */
{
"_id" : ObjectId("610e97f48e28e168e81a2e62"),
"company" : "apple",
"computer" : 15000,
"iphone" : 6000
}
/* 3 */
{
"_id" : ObjectId("610e97fe8e28e168e81a2e87"),
"company" : "huawei",
"computer" : 10000,
"iphone" : 8000
}
每条数据增加一个字段,名字为pad,默认初始值为3000
update_dict = {"$addFields": {"pad": 3000}}
# 注意:必须使用列表包裹更新条件,否则报错
ret = db["test"].update_many({}, [update_dict])
# 结果
/* 2 */
{
"_id" : ObjectId("610e97f48e28e168e81a2e62"),
"company" : "apple",
"computer" : 15000,
"iphone" : 6000,
"pad" : 3000
}
/* 3 */
{
"_id" : ObjectId("610e97fe8e28e168e81a2e87"),
"company" : "huawei",
"computer" : 10000,
"iphone" : 8000,
"pad" : 3000
}
2.$addToSet
使用方式一:作为列表修改器,一般和update方法搭配使用。
数据类型如下
{
"_id" : ObjectId("610e91708e28e168e81a1c2f"),
"name" : "xian",
"foods" : [
"肉夹馍"
]
}
给foods列表中添加数据,达到去重效果
ret = db["test"].update_one(filter={"name": "xian"}, update={"$addToSet": {"foods": "糖醋面皮"}})
# 结果---插入相同的值会直接去重,但是上面的写法缺点是每次只能添加一个元素
{
"_id" : ObjectId("610e91708e28e168e81a1c2f"),
"name" : "xian",
"foods" : [
"肉夹馍",
"糖醋面皮"
]
}
# 添加一组元素 --- 配合$each使用: $each配置$addToSet使用,给数组中添加多个元素
ret = db["test"].update_one(filter={"name": "xian"}, update={"$addToSet": {"foods": {"$each":["麻辣烫夹馍","东关烧烤"]}}})
# 结果
{
"_id" : ObjectId("610e91708e28e168e81a1c2f"),
"name" : "xian",
"foods" : [
"肉夹馍",
"糖醋面皮",
"麻辣烫夹馍",
"东关烧烤"
]
}
使用方式二:去重的话,一般和group搭配使用
数据类型如下
{
"_id" : ObjectId("610e8d0d8e28e168e81a1009"),
"name" : "zhang",
"intro" : "a"
}
/* 2 */
{
"_id" : ObjectId("610e8d148e28e168e81a1023"),
"name" : "zhang",
"intro" : "a"
}
/* 3 */
{
"_id" : ObjectId("610e8d1b8e28e168e81a103b"),
"name" : "zhang",
"intro" : "b"
}
查询intro值并去重
# 需要说明的是,$addToSet去重一般会和分组group一起使用
match_dict = {"$match": {"name": "zhang"}}
group_dict = {"$group": {"_id": None, "intro_list": {"$addToSet": "$intro"}}}
# 结果
[{'_id': None, 'intro_list': ['b', 'a']}]
3. $and
作用:逻辑与
4. $avg
作用:求平均
5. $add
作用:数学运算
数据类型如下
/* 2 */
{
"_id" : ObjectId("610e97f48e28e168e81a2e62"),
"company" : "apple",
"computer" : 15000,
"iphone" : 6000
}
/* 3 */
{
"_id" : ObjectId("610e97fe8e28e168e81a2e87"),
"company" : "huawei",
"computer" : 10000,
"iphone" : 8000
}
match_dict = {"$match": {"$or": [{"company": "apple"}, {"company": "huawei"}]}}
# project也可以增加字段,$add将不同字段的值进行相加
project_dict = {"$project": {"company": 1, "computer": 1, "iphone": 1, "total": {"$add": ["$computer", "$iphone"]}}}
ret = db["test"].aggregate([match_dict, project_dict])
print(list(ret))
# 结果
[{'_id': ObjectId('610e97f48e28e168e81a2e62'), 'company': 'apple', 'computer': 15000, 'iphone': 6000, 'total': 21000}, {'_id': ObjectId('610e97fe8e28e168e81a2e87'), 'company': 'huawei', 'computer': 10000, 'iphone': 8000, 'total': 18000}]
6. $all
作用:用于匹配数组元素
7. $abs
作用:求绝对值
数据
{
"_id" : ObjectId("610ea57f8e28e168e81a5370"),
"name" : "haerbin",
"temp" : -10
}
返回绝对值后的数据
match_dict = {"$match": {"name": "haerbin"}}
# 利用project可以增加字段的性质,替换_id的值
project = {"$project": {"name": 1, "temp": 1, "abs_temp": {"$abs": "$temp"}, "_id": {"$toString": "$_id"}}}
ret = db["test"].aggregate([match_dict, project])
# 结果
[{'name': 'haerbin', 'temp': -10, 'abs_temp': 10, '_id': '610ea57f8e28e168e81a5370'}]
同样适用于find
# 完美解决find查询后处理_id类型转换的麻烦
ret = db["test"].find_one({"name": "haerbin"},{"name": 1, "temp": 1, "abs_temp": {"$abs": "$temp"}, "_id": {"$toString": "$_id"}})
# 结果:
{'name': 'haerbin', 'temp': -10, 'abs_temp': 10, '_id': '610ea57f8e28e168e81a5370'}
8. $arrayElemAt
作用:返回数组内指定下标的元素
数据类型如下:
{
"_id" : ObjectId("610e91708e28e168e81a1c2f"),
"name" : "xian",
"foods" : [
"肉夹馍",
"糖醋面皮",
"麻辣烫夹馍",
"东关烧烤"
],
"pad" : 3000
}
返回foods下标为0的元素
match_dict = {"$match": {"name": "xian"}}
# 取foods列表中的第一个元素
project = {"$project": {"foods": 1, "pad": 1, "_id": {"$toString": "$_id"}, "foods_num1": {"$arrayElemAt":["$foods", 0]}}}
ret = db["test"].aggregate([match_dict, project])
# 结果
[{'foods': ['肉夹馍', '糖醋面皮', '麻辣烫夹馍', '东关烧烤'], 'pad': 3000, '_id': '610e91708e28e168e81a1c2f', 'foods_num1': '肉夹馍'}]
9. $arrayToObject
作用:将数组内的key-value元素转换为对象
使用场景一:将数据的字段A和字段B的值组成字典格式返回
数据类型如下
{
"_id" : ObjectId("610e97f48e28e168e81a2e62"),
"company" : "apple",
"computer" : 15000,
"iphone" : 6000,
"pad" : 3000
}
/* 3 */
{
"_id" : ObjectId("610e97fe8e28e168e81a2e87"),
"company" : "huawei",
"computer" : 10000,
"iphone" : 8000,
"pad" : 3000
}
输出,{"hawei":8000},{"apple":"6000"}
match_dict = {"$match": {"$or": [{"company": "apple"}, {"company": "huawei"}]}}
# push不能和project搭配使用。
group_dict = {"$group": {"_id": None, "list": {"$push": {"k": "$company", "v": "$pad"}}}}
# project = {"$project": {"computer": 1, "iphone": 1, "_id": {"$toString": "$_id"}, "pad": 1,"list": {"$push": {"k": "$company", "v": "$pad"}}}}
ret = db["test"].aggregate([match_dict, group_dict])
# 结果
[{'_id': None, 'list': [{'k': 'apple', 'v': 3000}, {'k': 'huawei', 'v': 3000}]}]
match_dict = {"$match": {"$or": [{"company": "apple"}, {"company": "huawei"}]}}
group_dict = {"$group": {"_id": None, "list": {"$push": {"k": "$company", "v": "$iphone"}}}}
project = {"$project": {"return_dict": {"$arrayToObject": "$list"}}}
ret = db["test"].aggregate([match_dict, group_dict, project])
# 结果
[{'_id': None, 'return_dict': {'apple': 6000, 'huawei': 8000}}]
arrayToObject能够识别的类型如下
类型一:
[ [ “item”, “abc123”], [ “qty”, 25 ] ]
类型二:
[ { “k”: “item”, “v”: “abc123”}, { “k”: “qty”, “v”: 25 } ]
https://blog.csdn.net/qq_38923792/article/details/110390327
10. $accumulator
作用: 自定义累加器
TODO
11. $allElementsTrue
作用:判断集合中是否包含指定元素,如果数组中有null,0,undefine则返回false,空数组及其他值返回True。
数据类型如下
5 */
{
"_id" : ObjectId("610fbcacaa6673116fbbe472"),
"s_id" : 1,
"responses" : [
true
]
}
/* 6 */
{
"_id" : ObjectId("610fbcacaa6673116fbbe473"),
"s_id" : 2,
"responses" : [
true,
false
]
}
/* 7 */
{
"_id" : ObjectId("610fbcacaa6673116fbbe474"),
"s_id" : 3,
"responses" : []
}
/* 8 */
{
"_id" : ObjectId("610fbcacaa6673116fbbe475"),
"s_id" : 4,
"responses" : [
1,
true,
"seven"
]
}
/* 9 */
{
"_id" : ObjectId("610fbcacaa6673116fbbe476"),
"s_id" : 5,
"responses" : [
0
]
}
/* 10 */
{
"_id" : ObjectId("610fbcacaa6673116fbbe477"),
"s_id" : 6,
"responses" : [
[]
]
}
/* 11 */
{
"_id" : ObjectId("610fbcacaa6673116fbbe478"),
"s_id" : 7,
"responses" : [
[
0
]
]
}
/* 12 */
{
"_id" : ObjectId("610fbcacaa6673116fbbe479"),
"s_id" : 8,
"responses" : [
[
true
]
]
}
/* 13 */
{
"_id" : ObjectId("610fbcacaa6673116fbbe47a"),
"s_id" : 9,
"responses" : [
null
]
}
/* 14 */
{
"_id" : ObjectId("610fbcacaa6673116fbbe47b"),
"s_id" : 10,
"responses" : [
false
]
判断response是否包含None,0,undefine的元素
当前版本:MONGO_VERSION=4.4.6
需要版本:MONGO_VERSION=5.0+
temp_dict = {"$project": {"responses": 1, "isAllTrue": {"$allElementsTrue": ["$responses"] }, "_id": 0 } }
ret = db["test"].aggregate([temp_dict])
报错:pymongo.errors.OperationFailure: $allElementsTrue's argument must be an array, but is missing, full error: {'operationTime': Timestamp(1628423017, 1), 'ok': 0.0, 'errmsg': "$allElementsTrue's argument must be an array, but is missing", 'code': 17040, 'codeName': 'Location17040', '$clusterTime': {'clusterTime': Timestamp(1628423017, 1), 'signature': {'hash': b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'keyId': 0}}}
12. $acos
作用:返回一个值的反余弦值
13. $avg
作用:求平均值
使用场景一:和project搭配使用
数据样式
/* 2 */
{
"_id" : ObjectId("610e97f48e28e168e81a2e62"),
"company" : "apple",
"computer" : 15000,
"iphone" : 6000,
"pad" : 3000
}
/* 3 */
{
"_id" : ObjectId("610e97fe8e28e168e81a2e87"),
"company" : "huawei",
"computer" : 10000,
"iphone" : 8000,
"pad" : 3000
}
求computer + iphone + pad 加起来的平均值
match_dict = {"$match": {"$or": [{"company": "huawei"}, {"company": "apple"}]}}
# 对多个数求平均值
project = {"$project": {"_id": {"$toString": "$_id"}, "avg_three": {"$avg": ["$computer", "$iphone", "$pad"]}}}
ret = db["test"].aggregate([match_dict, project])
print(list(ret))"avg_three": {"$avg": ["$computer", "$iphone", "$ipad"]}}}}
ret = db["test"].aggregate([match_dict, project])
# 结果
[{'_id': '610e97f48e28e168e81a2e62', 'avg_three': 8000.0}, {'_id': '610e97fe8e28e168e81a2e87', 'avg_three': 7000.0}]
使用场景二:和group搭配使用
数据格式如下
{ "_id" : 1, "item" : "abc", "price" : 10, "quantity" : 2, "date" : ISODate("2014-01-01T08:00:00Z") }
{ "_id" : 2, "item" : "jkl", "price" : 20, "quantity" : 1, "date" : ISODate("2014-02-03T09:00:00Z") }
{ "_id" : 3, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-03T09:05:00Z") }
{ "_id" : 4, "item" : "abc", "price" : 10, "quantity" : 10, "date" : ISODate("2014-02-15T08:00:00Z") }
{ "_id" : 5, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T09:12:00Z") }
官方例子代码
db.sales.aggregate(
[
{
$group:
{
_id: "$item",
avgAmount: { $avg: { $multiply: [ "$price", "$quantity" ] } },
avgQuantity: { $avg: "$quantity" }
}
}
]
)
# 结果
{ "_id" : "xyz", "avgAmount" : 37.5, "avgQuantity" : 7.5 }
{ "_id" : "jkl", "avgAmount" : 20, "avgQuantity" : 1 }
{ "_id" : "abc", "avgAmount" : 60, "avgQuantity" : 6 }
== index b ==
1. $bucket
作用:分段统计数据
使用方式一:单独使用
数据格式如下
// Newly added document
{
"name" : "t1",
"price" : 100.0,
"_id" : ObjectId("6110e53447e9154a103dc257")
}
// Newly added document
{
"name" : "t2",
"price" : 200.0,
"_id" : ObjectId("6110e53847e9154a103dc258")
}
// Newly added document
{
"name" : "t3",
"price" : 300.0,
"_id" : ObjectId("6110e53d47e9154a103dc259")
}
// Newly added document
{
"name" : "t4",
"price" : 400.0,
"_id" : ObjectId("6110e54247e9154a103dc25a")
}
// Newly added document
{
"name" : "t5",
"price" : 500.0,
"_id" : ObjectId("6110e54747e9154a103dc25b")
}
分组 --- 一般group是按照字段进行分组,$bucket可以按照字段值进行判断分组
match = {"$match":{"_id":{"$ne":""}}}
bucket = {
"$bucket": {
# 分组
"groupBy": "$price",
# 边界,200<=x<400 有几个边界条件,就会分几组
"boundaries": [ 200, 400 ],
#符合边界条件的为一组,组名为$price的值, 不符合边界条件的为一组,组名为Other
"default": "Other",
# 输出格式
"output": {
"count": { "$sum": 1 },
"titles" : { "$push": "$name"}
}
}
}
# 结果
[{'_id': 200, 'count': 2, 'titles': ['t2', 't3']},
{'_id': 'Other', 'count': 3, 'titles': ['t1', 't4', 't5']}]
bucket = {
"$bucket": {
# 分组
"groupBy": "$price",
# 边界,t1:200<=x<300, t2:300<=x<500
"boundaries": [ 100, 300, 500 ],
# 符合边界条件的为一组,组名为$price的值, 不符合边界条件的为一组,组名为Other
"default": "Other",
# 输出格式
"output": {
"count": { "$sum": 1 },
"titles" : { "$push": "$name"}
}
}
}
# 结果
[{'_id': 100, 'count': 2, 'titles': ['t1', 't2']}, {'_id': 300, 'count': 2, 'titles': ['t3', 't4']}, {'_id': 'Other', 'count': 1, 'titles': ['t5']}]
使用方式二:搭配facet使用
https://docs.mongodb.com/v4.4/reference/operator/aggregation/bucket/#mongodb-pipeline-pipe.-bucket
2. $bucketauto
作用:根据指定的表达式将传入的文档分类到特定数量的组(称为bucket)中。Bucket边界将自动确定,以便将文档平均分配到指定数量的Bucket中。
3. $binarySize
作用:返回指定字符串或二进制数据的字节大小
例如:可以存储照片的大小
== index c ==
1. $ceil
作用:
数据格式如下
{
"_id" : ObjectId("6110e53447e9154a103dc257"),
"name" : "t1",
"price" : -2.1
}
{
"_id" : ObjectId("6110e53847e9154a103dc258"),
"name" : "t2",
"price" : 0.0
}
{
"_id" : ObjectId("6110e53d47e9154a103dc259"),
"name" : "t3",
"price" : 3.2
}
{
"_id" : ObjectId("6110e54247e9154a103dc25a"),
"name" : "t4",
"price" : -2.6
}
{
"_id" : ObjectId("6110e54747e9154a103dc25b"),
"name" : "t5",
"price" : 1.0
}
// Newly added document
{
"name" : "t6",
"price" : 3.6,
"_id" : ObjectId("6114be69605d7c02aa34eac8")
}
代码
match = {"$match":{"_id":{"$ne":""}}}
project = {
"$project": { "price":1,"name":1,"new_price":{"$ceil":"$price"}}
}
ret= db["test"]["test"].aggregate([match,project])
# 结果:0还是0,负数返回比自己大的最近负整数,正数返回比自己大的最近正整数
[{'_id': ObjectId('6110e53447e9154a103dc257'), 'name': 't1', 'price': -2.1, 'new_price': -2.0},
{'_id': ObjectId('6110e53847e9154a103dc258'), 'name': 't2', 'price': 0.0, 'new_price': 0.0},
{'_id': ObjectId('6110e53d47e9154a103dc259'), 'name': 't3', 'price': 3.2, 'new_price': 4.0},
{'_id': ObjectId('6110e54247e9154a103dc25a'), 'name': 't4', 'price': -2.6, 'new_price': -2.0},
{'_id': ObjectId('6110e54747e9154a103dc25b'), 'name': 't5', 'price': 1.0, 'new_price': 1.0},
{'_id': ObjectId('6114be69605d7c02aa34eac8'), 'name': 't6', 'price': 3.6, 'new_price': 4.0}]
print(list(ret))
2. $cmp
作用:比较两个数的大小
数据格式如下:
{
"_id" : ObjectId("6110e53447e9154a103dc257"),
"name" : "t1",
"price" : -2.1
}
{
"_id" : ObjectId("6110e53847e9154a103dc258"),
"name" : "t2",
"price" : 0.0
}
{
"_id" : ObjectId("6110e53d47e9154a103dc259"),
"name" : "t3",
"price" : 3.2
}
{
"_id" : ObjectId("6110e54247e9154a103dc25a"),
"name" : "t4",
"price" : -2.6
}
{
"_id" : ObjectId("6110e54747e9154a103dc25b"),
"name" : "t5",
"price" : 1.0
}
// Newly added document
{
"name" : "t6",
"price" : 3.6,
"_id" : ObjectId("6114be69605d7c02aa34eac8")
}
需求:返回price比0大和比0小的数据
代码
match = {"$match":{"_id":{"$ne":""}}}
project = {
# 和0进行比较
"$project": { "price":1,"name":1,"cmp_to_zero":{"$cmp":["$price",0]}}
}
ret= db["test"]["test"].aggregate([match,project])
# 结果---cmp_to_zero 为0表示和0相等,-1表示小于0, 1表示大于0
[{'_id': ObjectId('6110e53447e9154a103dc257'), 'name': 't1', 'price': -2.1, 'cmp_to_zero': -1},
{'_id': ObjectId('6110e53847e9154a103dc258'), 'name': 't2', 'price': 0.0, 'cmp_to_zero': 0},
{'_id': ObjectId('6110e53d47e9154a103dc259'), 'name': 't3', 'price': 3.2, 'cmp_to_zero': 1},
{'_id': ObjectId('6110e54247e9154a103dc25a'), 'name': 't4', 'price': -2.6, 'cmp_to_zero': -1},
{'_id': ObjectId('6110e54747e9154a103dc25b'), 'name': 't5', 'price': 1.0, 'cmp_to_zero': 1},
{'_id': ObjectId('6114be69605d7c02aa34eac8'), 'name': 't6', 'price': 3.6, 'cmp_to_zero': 1}]
3. $concat
作用:字符串拼接
数据格式如下
{
"_id" : ObjectId("6110e53447e9154a103dc257"),
"city" : "shantou",
"contry" : "china",
"province" : "guangdong",
"street" : "test_street"
}
拼接完整的地址
代码
match = {"$match":{"_id":{"$ne":""}}}
project = {
"$project": { "province":1,"contry":1,"city":1,"street":1,"detail":{"$concat":["$contry","/","$province","/","$city","/","$street"]}}
}
ret= db["test"]["test"].aggregate([match,project])
# 结果
[
{'_id': ObjectId('6110e53447e9154a103dc257'), 'city': 'shantou', 'contry': 'china', 'province': 'guangdong', 'street': 'test_street', 'detail': 'china/guangdong/shantou/test_street'}
]
4.$concatArrays
作用:将多个列表拼接成一个列表
数据格式如下
{
"_id" : ObjectId("6110e53447e9154a103dc257"),
"east_city" : [
"shanghai",
"xiamen"
],
"north_city" : [
"beijing",
"tianjin"
],
"south_city" : [
"nanjing",
"shenzhen"
],
"west_city" : [
"shengdu",
"wulumuqi"
],
"middle_city" : [
]
}
将东西南北城市列表合成一个列表返回
代码
match = {"$match":{"_id":{"$ne":""}}}
project = {
"$project": { "city_list":{"$concatArrays":["$east_city","$west_city","$north_city","$south_city","$middle_city"]}}
}
ret= db["test"]["test"].aggregate([match,project])
# 结果
[{'_id': ObjectId('6110e53447e9154a103dc257'), 'city_list': ['shanghai', 'xiamen', 'shengdu', 'wulumuqi', 'beijing', 'tianjin', 'nanjing', 'shenzhen']}]
print(list(ret))
5.$cond
作用:返回if或者else里的返回值---条件匹配
数据格式如下:
// Newly added document
{
"name" : "t1",
"price" : 0.0,
"_id" : ObjectId("6114d086605d7c02aa34ead9")
}
// Newly added document
{
"name" : "t2",
"price" : 10.0,
"_id" : ObjectId("6114d08f605d7c02aa34eada")
}
// Newly added document
{
"name" : "t3",
"price" : 30.0,
"_id" : ObjectId("6114d096605d7c02aa34eadb")
}
// Newly added document
{
"name" : "t4",
"price" : 50.0,
"_id" : ObjectId("6114d09f605d7c02aa34eadc")
}
// Newly added document
{
"name" : "t5",
"price" : 80.0,
"_id" : ObjectId("6114d0a4605d7c02aa34eadd")
}
// Newly added document
{
"name" : "t6",
"price" : 100.0,
"_id" : ObjectId("6114d0aa605d7c02aa34eade")
}
price 小于100返回1000,大于50返回2000
代码
match = {"$match":{"_id":{"$ne":""}}}
project = {
# 满足 price和100的比较,
"$project": {"_id":0,"price":1, "price_result":{"$cond":{"if":{"$gte":["$price",100]},"then":2000,"else":1000}}}
}
ret= db["test"]["test"].aggregate([match,project])
#
[{'price': 0.0, 'price_result': 1000},
{'price': 10.0, 'price_result': 1000},
{'price': 30.0, 'price_result': 1000},
{'price': 50.0, 'price_result': 1000},
{'price': 80.0, 'price_result': 1000},
{'price': 100.0, 'price_result': 2000}]
print(list(ret))
6. $convert
作用:字段类型转换
准备数据时候,发现mongo不能直接插入decimal类型的数据,只能插入decimal128类型,插入数据代码
from decimal import Decimal
from bson.decimal128 import Decimal128
data = {
"name":"zhang",
"age":10,
"weight":75.5,
"birthday": datetime.datetime.now(),
"income": Decimal128(Decimal("8500.23")),
"has_car":False
}
ret = db["test"]["test"].insert_one(data)
print(ret.inserted_id)
数据格式如下:
{
"_id" : ObjectId("6114e20b661341258c35c08c"),
"name" : "zhang",
"age" : NumberInt(10),
"weight" : 75.5,
"birthday" : ISODate("2021-08-12T16:55:39.754+0000"),
"income" : NumberDecimal("8500.23"),
"has_car" : false
}
语法如下
{
$convert:
{
input: <expression>, 输入的值
to: <type expression>, 输出的值
onError: <expression>, // 出错操作
onNull: <expression> // 空值操作
}
}
代码
match = {"$match":{"_id":{"$ne":""}}}
project = {
"$project": {
"name":{"$convert":{"input":"$name","to":"bool","onError":"error","onError":None}},
"age":{"$convert":{"input":"$age","to":"string","onError":"error","onError":None}},
"weight":{"$convert":{"input":"$weight","to":"int","onError":"error","onError":None}},
"birthday":{"$convert":{"input":"$birthday","to":"bool","onError":"error","onError":None}},
"income":{"$convert":{"input":"$income","to":"decimal","onError":"error","onError":None}},
"has_car":{"$convert":{"input":"$has_car","to":"bool","onError":"error","onError":None}},
# $toString 可替代
"_id":{"$convert":{"input":"$_id","to":"string","onError":"error","onError":None}},
}
}
ret= db["test"]["test"].aggregate([match,project])
# 结果
[
{
'name': True,
'age': '10',
'weight': 75,
'birthday': True,
'income': Decimal128('8500.23'),
'has_car': False,
'_id': '6114e20b661341258c35c08c'
}
]
print(list(ret))
具体见:https://docs.mongodb.com/v4.4/reference/operator/aggregation/convert/
=== index d===
1. $dateFromParts
作用:根据日期的组成属性构造并返回Date对象。
2. $dateToString
作用:将时间类型的数据转成指定格式的字符串
数据格式如下
{
"_id" : ObjectId("6114e20b661341258c35c08c"),
"name" : "zhang",
"age" : 10,
"weight" : 75.5,
"birthday" : ISODate("2021-08-12T16:55:39.754Z"),
"income" : NumberDecimal("8500.23"),
"has_car" : false
}
birthday转换成不同格式的字符串
match_dict = {"$match": {"name": {"$ne": None}}}
project = {
"$project": {
"birthday": 1,
# 年月日时分秒
"format_1": {"$dateToString": {"format": "%Y-%m-%d %H:%M:%S", "date": "$birthday"}},
# 这一年的第几天
"format_2": {"$dateToString": {"format": "%j", "date": "$birthday"}},
# 这一天是周几,1-7
"format_3": {"$dateToString": {"format": "%u", "date": "$birthday"}},
# 这一年的第几个周
"format_4": {"$dateToString": {"format": "%U", "date": "$birthday"}},
}
}
ret = db["test"].aggregate([match_dict, project])
# 结果
[
{
'_id': ObjectId('6114e20b661341258c35c08c'), 'birthday': datetime.datetime(2021, 8, 12, 16, 55, 39, 754000),
'format_1': '2021-08-12 16:55:39',
'format_2': '224',
'format_3': '4',
'format_4': '32'
}
]
print(list(ret))
3. $dateFromString
作用:将string类型时间字符串转为datetime类型
{
"_id" : ObjectId("6163a977badb568bcc41e364"),
"date" : "2021-10-11",
"id" : NumberInt(1)
}
{
"_id" : ObjectId("6163a9898676e5552a04430e"),
"date" : "2021-10-11 11:37:03",
"id" : NumberInt(2)
}
代码
match_dict = {"$match":{"id":{"$ne":""}}}
project_dict = {"$project":{"new_data":{"$dateFromString":{"dateString":"$date"}}}}
ret = list(db["lakers"].aggregate(pipeline=[match_dict,project_dict]))
# [{'_id': ObjectId('6163a977badb568bcc41e364'), 'new_data': datetime.datetime(2021, 10, 11, 0, 0)}, {'_id': ObjectId('6163a9898676e5552a04430e'), 'new_data': datetime.datetime(2021, 10, 11, 11, 37, 3)}]
print(ret)
# 尝试一:想让id=1的date变为%Y-%m-%d %H:%M:%S,不可行。
project_dict = {"$project":{"new_data":{"$dateFromString":{"dateString":"$date","format": "%Y-%m-%d %H:%M:%S"}}}}
# 尝试二:想让id=d的date变为%Y-%m-%d不可行。
project_dict = {"$project":{"new_data":{"$dateFromString":{"dateString":"$date","format": "%Y-%m-%d %H:%M:%S"}}}}
# 尝试三:添加时间区
project_dict = {"$project":{"new_data":{"$dateFromString":{"dateString":"$date","timezone": "Asia/Chongqing"}}}}
# [{'_id': ObjectId('6163a977badb568bcc41e364'), 'new_data': datetime.datetime(2021, 10, 10, 16, 0)}, {'_id': ObjectId('6163a9898676e5552a04430e'), 'new_data': datetime.datetime(2021, 10, 11, 3, 37, 3)}]
print(ret)
4. $dateToParts
作用:返回时间的日月年周等信息
{
"_id" : ObjectId("6163b00af3c5ab01700d1acc"),
"id" : NumberInt(1),
"date" : ISODate("2021-10-10T16:00:00.000+0000")
}
{
"_id" : ObjectId("6163b00af3c5ab01700d1acd"),
"id" : NumberInt(2),
"date" : ISODate("2021-10-11T03:37:03.000+0000")
}
代码
match_dict = {"$match":{"id":{"$ne":""}}}
project_dict = {"$project":{"new_data":{"$dateToParts":{"date":"$date","timezone": "Asia/Chongqing","iso8601":True}}}}
ret = list(db["lakers"].aggregate(pipeline=[match_dict,project_dict]))
# 返回年份,第几周,周几等数据
# [{'_id': ObjectId('6163b00af3c5ab01700d1acc'), 'new_data': {'isoWeekYear': 2021, 'isoWeek': 41, 'isoDayOfWeek': 1, 'hour': 0, 'minute': 0, 'second': 0, 'millisecond': 0}}, {'_id': ObjectId('6163b00af3c5ab01700d1acd'), 'new_data': {'isoWeekYear': 2021, 'isoWeek': 41, 'isoDayOfWeek': 1, 'hour': 11, 'minute': 37, 'second': 3, 'millisecond': 0}}]
print(ret)
5. $divide
作用:指定字段做除法,返回结果
{
"_id" : ObjectId("6163b1b07bc2ea5ba3269122"),
"id" : 1.0,
"v1" : 10.0,
"v2" : 2.0
}
{
"_id" : ObjectId("6163b1b47bc2ea5ba3269123"),
"id" : 2.0,
"v1" : 5.0,
"v2" : 0.0
}
代码
# 一个字段和指定值相除
match_dict = {"$match":{"id":{"$ne":""}}}
project_dict = {"$project":{"id":1,"new_v1":{"$divide":["$v1",10]}}}
ret = list(db["lakers"].aggregate(pipeline=[match_dict,project_dict]))
# [{'_id': ObjectId('6163b1b07bc2ea5ba3269122'), 'id': 1.0, 'new_v1': 1.0}, {'_id': ObjectId('6163b1b47bc2ea5ba3269123'), 'id': 2.0, 'new_v1': 0.5}]
print(ret)
# 两个字段相除,分母不能为0
match_dict = {"$match":{"id":{"$ne":""}}}
project_dict = {"$project":{"id":1,"v3":{"$divide":["$v2","$v1"]}}}
ret = list(db["lakers"].aggregate(pipeline=[match_dict,project_dict]))
# [{'_id': ObjectId('6163b1b07bc2ea5ba3269122'), 'id': 1.0, 'v3': 0.2}, {'_id': ObjectId('6163b1b47bc2ea5ba3269123'), 'id': 2.0, 'v3': 0.0}]
print(ret)
== index e ==
1. $exp
作用:计算e的n次幂
https://docs.mongodb.com/v4.4/reference/operator/aggregation/exp/#mongodb-expression-exp.-exp
2. $eq
作用:判断两个值是否相等
数据格式如下
{
"_id" : ObjectId("6114e20b661341258c35c08c"),
"name" : "zhang",
"age" : 10,
"weight" : 75.5,
"birthday" : ISODate("2021-08-12T16:55:39.754Z"),
"income" : NumberDecimal("8500.23"),
"has_car" : false
}
代码
match_dict = {"$match": {"name": {"$ne": None}}}
project = {
"$project": {
"age": 1, "age_10_flag": {"$eq": ["$age", 10]}
}
}
ret = db["test"].aggregate([match_dict, project])
# 结果
[
{'_id': ObjectId('6114e20b661341258c35c08c'), 'age': 10, 'age_10_flag': True}
]
print(list(ret))
3. $elemMatch
作用:匹配数组里面object元素
数据
{
"contry" : "usa",
"list" : [
{
"age" : "10"
},
{
"age" : "12"
},
{
"age" : "0"
}
],
"_id" : ObjectId("613b40f9483c9940d852c6c3")
}
用法:
# 匹配list中的object对象,age为10
ret = db["lakers"].find_one({"list":{"$elemMatch":{"age":"10"}}})
# {'_id': ObjectId('613b40f9483c9940d852c6c3'), 'contry': 'usa', 'list': [{'age': '10'}, {'age': '12'}, {'age': '0'}]}
print(ret)
4.$explain
作用:分析查询语句执行的信息
ret = db["lakers"].find({"list":{"$elemMatch":{"age":"10"}}}).explain()
# 查看信息
{
'explainVersion': '1',
'queryPlanner': {'namespace': 'kobe.lakers', 'indexFilterSet': False, 'parsedQuery': {'list': {'$elemMatch': {'age': {'$eq': '10'}}}},
'maxIndexedOrSolutionsReached': False,
'maxIndexedAndSolutionsReached': False,
'maxScansToExplodeReached': False,
'winningPlan': {'stage': 'COLLSCAN', 'filter': {'list': {'$elemMatch': {'age': {'$eq': '10'}}}}, 'direction': 'forward'}, 'rejectedPlans': []},
'executionStats': {'executionSuccess': True, 'nReturned': 1, 'executionTimeMillis': 0, 'totalKeysExamined': 0, 'totalDocsExamined': 2, 'executionStages': {'stage': 'COLLSCAN', 'filter': {'list': {'$elemMatch': {'age': {'$eq': '10'}}}}, 'nReturned': 1, 'executionTimeMillisEstimate': 0, 'works': 4, 'advanced': 1, 'needTime': 2, 'needYield': 0, 'saveState': 0, 'restoreState': 0, 'isEOF': 1, 'direction': 'forward', 'docsExamined': 2}, 'allPlansExecution': []}, 'command': {'find': 'lakers', 'filter': {'list': {'$elemMatch': {'age': '10'}}}, '$db': 'kobe'},
'serverInfo':
{'host': '5b61d1253', 'port': 27017, 'version': '5.0.2', 'gitVersion':'6d9ec525e78465dcecadcff99cce953d380fedc8'},
'serverParameters': {'internalQueryFacetBufferSizeBytes': 104857600,
'internalQueryFacetMaxOutputDocSizeBytes': 104857600, 'internalLookupStageIntermediateDocumentMaxSizeBytes': 104857600, 'internalDocumentSourceGroupMaxMemoryBytes': 104857600,
'internalQueryMaxBlockingSortMemoryUsageBytes': 104857600,
'internalQueryProhibitBlockingMergeOnMongoS': 0,
'internalQueryMaxAddToSetBytes': 104857600,
'internalDocumentSourceSetWindowFieldsMaxMemoryBytes': 104857600},
'ok': 1.0}
测试
lakers中插入10000条数据,每条数据类型如下
{
"_id" : ObjectId("6163e1afc1076c551732ef65"),
"name" : "Mrs. Jamie Quinn DVM",
"index" : NumberInt(0),
"addr" : "Unit 5690 Box 9515\nDPO AA 11735"
}
# 不加索引前,查询index=9999的数据,打印执行信息
'explainVersion': '1',
'queryPlanner': {
'namespace': 'kobe.lakers', # 查询集合
'indexFilterSet': False, # 是否使用索引
'parsedQuery': {
'index': {
'$eq': 9999
}
}, # 查询条件
'maxIndexedOrSolutionsReached': False,
'maxIndexedAndSolutionsReached': False,
'maxScansToExplodeReached': False,
'winningPlan': {
'stage': 'COLLSCAN', # 查询方式,全表扫描
'filter': {
'index': {
'$eq': 9999
}
}, # 过滤条件
'direction': 'forward' # 查询顺序
}, # 最佳执行计划
'rejectedPlans': [] # 拒绝的执行计划
},
'executionStats': {
'executionSuccess': True, # 是否执行成功
'nReturned': 1, #
'executionTimeMillis': 5,
'totalKeysExamined': 0, # 索引扫描总次数
'totalDocsExamined': 10000, # 文档扫描总次数。
'executionStages': {
'stage': 'COLLSCAN', # 全表扫描
'filter': {
'index': {
'$eq': 9999
}
},
'nReturned': 1,
'executionTimeMillisEstimate': 0,
'works': 10002,
'advanced': 1,
'needTime': 10000,
'needYield': 0,
'saveState': 10,
'restoreState': 10,
'isEOF': 1, # 表示从文档的开始,扫描到文档结尾, 0表示不是
'direction': 'forward',
'docsExamined': 10000 # 文档检查数
},
'allPlansExecution': []
},
'command': {
'find': 'lakers',
'filter': {
'index': 9999
},
'$db': 'kobe'
},
'serverInfo': {
'host': '5b61d1275f53',
'port': 27017,
'version': '5.0.2',
'gitVersion': '6d9ec525e78465dcecadcff99cce953d380fedc8'
},
'serverParameters': {
'internalQueryFacetBufferSizeBytes': 104857600,
'internalQueryFacetMaxOutputDocSizeBytes': 104857600,
'internalLookupStageIntermediateDocumentMaxSizeBytes': 104857600,
'internalDocumentSourceGroupMaxMemoryBytes': 104857600,
'internalQueryMaxBlockingSortMemoryUsageBytes': 104857600,
'internalQueryProhibitBlockingMergeOnMongoS': 0,
'internalQueryMaxAddToSetBytes': 104857600,
'internalDocumentSourceSetWindowFieldsMaxMemoryBytes': 104857600
},
'ok': 1.0
}
# ======================= 给index字段加上索引 查询index=9999的数据,打印执行信息===============================
ret = db["lakers"].create_index([("index",1)])
# index_1
print(ret)
{
'explainVersion': '1',
'queryPlanner': {
'namespace': 'kobe.lakers',
'indexFilterSet': False,
'parsedQuery': {
'index': {
'$eq': 9999
}
},
'maxIndexedOrSolutionsReached': False,
'maxIndexedAndSolutionsReached': False,
'maxScansToExplodeReached': False,
'winningPlan': {
'stage': 'FETCH',
'inputStage': {
'stage': 'IXSCAN', # 全表扫描变为索引查询
'keyPattern': {
'index': 1
},
'indexName': 'index_1',
'isMultiKey': False,
'multiKeyPaths': {
'index': []
},
'isUnique': False,
'isSparse': False,
'isPartial': False,
'indexVersion': 2,
'direction': 'forward',
'indexBounds': {
'index': ['[9999, 9999]']
}
}
},
'rejectedPlans': []
},
'executionStats': {
'executionSuccess': True,
'nReturned': 1,
'executionTimeMillis': 4,
'totalKeysExamined': 1, # 索引扫描总次数
'totalDocsExamined': 1, # 文档扫描总次数从9999变为1
'executionStages': {
'stage': 'FETCH',
'nReturned': 1,
'executionTimeMillisEstimate': 2,
'works': 2,
'advanced': 1,
'needTime': 0,
'needYield': 0,
'saveState': 0,
'restoreState': 0,
'isEOF': 1,
'docsExamined': 1,
'alreadyHasObj': 0,
'inputStage': {
'stage': 'IXSCAN',
'nReturned': 1,
'executionTimeMillisEstimate': 2,
'works': 2,
'advanced': 1,
'needTime': 0,
'needYield': 0,
'saveState': 0,
'restoreState': 0,
'isEOF': 1,
'keyPattern': {
'index': 1
},
'indexName': 'index_1',
'isMultiKey': False,
'multiKeyPaths': {
'index': []
},
'isUnique': False,
'isSparse': False,
'isPartial': False,
'indexVersion': 2,
'direction': 'forward',
'indexBounds': {
'index': ['[9999, 9999]']
},
'keysExamined': 1,
'seeks': 1,
'dupsTested': 0,
'dupsDropped': 0
}
},
'allPlansExecution': []
},
'command': {
'find': 'lakers',
'filter': {
'index': 9999
},
'$db': 'kobe'
},
'serverInfo': {
'host': '5b61d1275f53',
'port': 27017,
'version': '5.0.2',
'gitVersion': '6d9ec525e78465dcecadcff99cce953d380fedc8'
},
'serverParameters': {
'internalQueryFacetBufferSizeBytes': 104857600,
'internalQueryFacetMaxOutputDocSizeBytes': 104857600,
'internalLookupStageIntermediateDocumentMaxSizeBytes': 104857600,
'internalDocumentSourceGroupMaxMemoryBytes': 104857600,
'internalQueryMaxBlockingSortMemoryUsageBytes': 104857600,
'internalQueryProhibitBlockingMergeOnMongoS': 0,
'internalQueryMaxAddToSetBytes': 104857600,
'internalDocumentSourceSetWindowFieldsMaxMemoryBytes': 104857600
},
'ok': 1.0
}
6. $each
作用配合addToSet往列表中添加多个元素
{
"id" : 3.0,
"vlist" : [
1.0,
2.0,
3.0
],
"ilist" : [
{
"value" : 1.0
},
{
"value" : 2.0
}
],
"_id" : ObjectId("6163b7377bc2ea5ba326912f")
}
代码
# vlist中添加一个元素
query_dict = {"id":3}
update_dict = {"$addToSet":{"vlist":4.0}}
ret = db["lakers"].update_one(filter=query_dict,update=update_dict)
# vlist中添加多个元素, $each要搭配$addToSet使用
update_dict = {"$addToSet":{"vlist":{"$each":[5.0,6.0]}}}
7. $exist
作用:匹配字段是否存在
{
"_id" : ObjectId("6163d0647bc2ea5ba3269157"),
"a" : 1.0,
"b" : 2.0
}
{
"_id" : ObjectId("6163d06e7bc2ea5ba3269158"),
"a" : 1.0,
"b" : 2.0,
"c" : 3.0
}
{
"_id" : ObjectId("6163d07c7bc2ea5ba3269159"),
"b" : 2.0,
"c" : 3.0
}
代码
# 查找包含a的数据
query_dict = {"a":{"$exists":True}}
# [{'_id': ObjectId('6163d0647bc2ea5ba3269157'), 'a': 1.0, 'b': 2.0}, {'_id': ObjectId('6163d06e7bc2ea5ba3269158'), 'a': 1.0, 'b': 2.0, 'c': 3.0}]
ret =list(db["lakers"].find(query_dict))
print(ret)
# 查找不包含a的数据
query_dict = {"a":{"$exists":False}}
# [{'_id': ObjectId('6163d07c7bc2ea5ba3269159'), 'b': 2.0, 'c': 3.0}]
ret =list(db["lakers"].find(query_dict))
8. $expr
作用:查询语句中使用聚合表达式
{
"_id" : ObjectId("6163d0647bc2ea5ba3269157"),
"a" : 1.0,
"b" : 2.0,
"c" : 1.0
}
{
"_id" : ObjectId("6163d06e7bc2ea5ba3269158"),
"a" : 1.0,
"b" : 2.0,
"c" : 3.0
}
{
"_id" : ObjectId("6163d07c7bc2ea5ba3269159"),
"b" : 2.0,
"c" : 3.0,
"a" : 1.0
}
代码
# 查询字段b的值大于字段c的值得数据
query_dict = {"$expr":{"$gt":["$b","$c"]}}
# [{'_id': ObjectId('6163d0647bc2ea5ba3269157'), 'a': 1.0, 'b': 2.0, 'c': 1.0}]
ret =list(db["lakers"].find(query_dict))
print(ret)
# 查询字段b的值大于字段c的值,且字段a的值不为1.0
query_dict = {"$and":[
{"$expr":{"$gt":["$b","$c"]}},{"a":{"$ne":1.0}}]
}
# []
ret =list(db["lakers"].find(query_dict))
print(ret)
== index f ==
1. $facet
作用:一次性执行多条aggragate语句:提升查询效率
facet_dict = {"$facet":{
"plus":[{"$match":{"a":{"$ne":""}}},{"$project":{"sum1":{"$sum":["$a","$b"]}}}],
"newList":[{"$match":{"a":{"$ne":""}}},{"$group":{"_id":"a","list":{"$addToSet":"$c"}}}],
}}
ret = list(db["lakers"].aggregate(pipeline=[facet_dict]))
[{'plus': [{'_id': ObjectId('6163d0647bc2ea5ba3269157'), 'sum1': 3.0}, {'_id': ObjectId('6163d06e7bc2ea5ba3269158'), 'sum1': 3.0}, {'_id': ObjectId('6163d07c7bc2ea5ba3269159'), 'sum1': 3.0}], 'newList': [{'_id': 'a', 'list': [3.0, 1.0]}]}]
2. $first
作用: 从分组结果中取第一条---必须和分组搭配使用
db = MongoClient(["127.0.0.1:37017","127.0.0.1:47017","127.0.0.1:57017"],replicaset="rs",connect=True)
group_dict = {"$group":{"_id":"$item","earlist_time":{"$first":"$date"}}}
ret = db["test"]["test"].aggregate([group_dict])
[
{'_id': 'jkl', 'earlist_time': datetime.datetime(2014, 2, 3, 9, 0)},
{'_id': 'xyz', 'earlist_time': datetime.datetime(2014, 2, 3, 9, 5)},
{'_id': 'abc', 'earlist_time': datetime.datetime(2014, 1, 1, 8, 0)}
]
3. $filter
作用:关联查询后的结果进行过滤(match不生效)
表1数据company
{
"name" : "huawei",
"_id" : ObjectId("6163d7c27bc2ea5ba3269189")
}
表2数据 goods
{
"_id" : ObjectId("6163d7eb7bc2ea5ba326918e"),
"c_id" : "6163d7c27bc2ea5ba3269189",
"name" : "p50",
"price" : 5000.0
}
{
"_id" : ObjectId("6163d7f27bc2ea5ba326918f"),
"c_id" : "6163d7c27bc2ea5ba3269189",
"name" : "p40",
"price" : 4000.0
}
{
"_id" : ObjectId("6163d7f77bc2ea5ba3269190"),
"c_id" : "6163d7c27bc2ea5ba3269189",
"name" : "p30",
"price" : 3000.0
}
{
"_id" : ObjectId("6163d7fc7bc2ea5ba3269191"),
"c_id" : "6163d7c27bc2ea5ba3269189",
"name" : "p20",
"price" : 2000.0
}
代码
# 查询huawei下所有产品价格大于3000的商品,聚合后match不生效
match_dict = {"$match":{"name":"huawei"}}
project_dict = {"$project":{"strid":{"$toString":"$_id"},"name":1}}
lookup_dict = {"$lookup":{"from":"goods","localField":"strid","foreignField":"c_id","as":"infos"}}
match_dict2 = {"$match":{"infos.price":{"$gt":3000}}}
# [{'_id': ObjectId('6163d7c27bc2ea5ba3269189'), 'name': 'huawei', 'strid': '6163d7c27bc2ea5ba3269189', 'infos': [{'_id': ObjectId('6163d7eb7bc2ea5ba326918e'), 'c_id': '6163d7c27bc2ea5ba3269189', 'name': 'p50', 'price': 5000.0}, {'_id': ObjectId('6163d7f27bc2ea5ba326918f'), 'c_id': '6163d7c27bc2ea5ba3269189', 'name': 'p40', 'price': 4000.0}, {'_id': ObjectId('6163d7f77bc2ea5ba3269190'), 'c_id': '6163d7c27bc2ea5ba3269189', 'name': 'p30', 'price': 3000.0}, {'_id': ObjectId('6163d7fc7bc2ea5ba3269191'), 'c_id': '6163d7c27bc2ea5ba3269189', 'name': 'p20', 'price': 2000.0}]}]
ret = list(db["company"].aggregate(pipeline=[match_dict, project_dict,lookup_dict,match_dict2]))
print(ret)
# $filter生效
match_dict = {"$match":{"name":"huawei"}}
project_dict = {"$project":{"strid":{"$toString":"$_id"},"name":1}}
lookup_dict = {"$lookup":{"from":"goods","localField":"strid","foreignField":"c_id","as":"infos"}}
project_dict2 = {"$project":{"infos":{"$filter":{"input":"$infos","as":"item","cond":{"$gt":["$$item.price",3000]}}}}}
# [{'_id': ObjectId('6163d7c27bc2ea5ba3269189'), 'infos': [{'_id': ObjectId('6163d7eb7bc2ea5ba326918e'), 'c_id': '6163d7c27bc2ea5ba3269189', 'name': 'p50', 'price': 5000.0}, {'_id': ObjectId('6163d7f27bc2ea5ba326918f'), 'c_id': '6163d7c27bc2ea5ba3269189', 'name': 'p40', 'price': 4000.0}]}]
ret = list(db["company"].aggregate(pipeline=[match_dict, project_dict,lookup_dict,project_dict2]))
print(ret)
4. $floor
作用:向下取整
{
"_id" : ObjectId("6163d0647bc2ea5ba3269157"),
"a" : 1.1,
"b" : 2.2,
"c" : 1.1
}
{
"_id" : ObjectId("6163d06e7bc2ea5ba3269158"),
"a" : 1.5,
"b" : 2.6,
"c" : 3.7
}
{
"_id" : ObjectId("6163d07c7bc2ea5ba3269159"),
"b" : 2.0,
"c" : 3.1,
"a" : 1.9
}
代码
match_dict = {"$match":{"a":{"$ne":""}}}
project_dict = {"$project":{"new_a":{"$floor":"$a"}}}
# [{'_id': ObjectId('6163d0647bc2ea5ba3269157'), 'new_a': 1.0}, {'_id': ObjectId('6163d06e7bc2ea5ba3269158'), 'new_a': 1.0}, {'_id': ObjectId('6163d07c7bc2ea5ba3269159'), 'new_a': 1.0}]
ret = list(db["lakers"].aggregate(pipeline=[match_dict, project_dict]))
print(ret)
== index g ==
1. graphLookup
作用:类似于自关联查询
数据
// Newly added document
{
"name" : "china",
"_id" : ObjectId("61275f3f1d2b613cc23063ae")
}
// Newly added document
{
"name" : "shanxi",
"up" : "china",
"_id" : ObjectId("61275f511d2b613cc23063af")
}
// Newly added document
{
"name" : "xian",
"up" : "shanxi",
"_id" : ObjectId("61275f721d2b613cc23063b0")
}
// Newly added document
{
"name" : "gaoxin",
"up" : "xian",
"_id" : ObjectId("61275f7c1d2b613cc23063b1")
}
== index H
1. $hint
作用: 多个索引存在的情况下,指定用某个索引查询
lakers中插入10000条数据,每条数据类型如下
{
"_id" : ObjectId("6163e1afc1076c551732ef65"),
"name" : "Mrs. Jamie Quinn DVM",
"index" : NumberInt(0),
"addr" : "Unit 5690 Box 9515\nDPO AA 11735"
}, 有两个索引,index和name
代码
指定name作为索引查询
ret = list(db["lakers"].find({"index":9999}).hint([("name",1)]))
[{'_id': ObjectId('6163e1afc1076c5517331674'), 'name': 'Edward Allison', 'index': 9999, 'addr': '256 Paula Lakes\nWallaceland, WA 38915'}]
== index I ==
1. $ifNull
作用:用于判断第一个表达式是否为 NULL,如果为 NULL 则返回第二个参数的值,如果不为 NULL 则返回第一个参数的值。
{
"_id" : ObjectId("6163ee4a8b72fe2513f74194"),
"item" : "x",
"description" : "product1",
"qty" : 300.0
}
{
"_id" : ObjectId("6163ee558b72fe2513f74195"),
"item" : "y",
"description" : null,
"qty" : 400.0
}
{
"_id" : ObjectId("6163ee5e8b72fe2513f74196"),
"item" : "z",
"qty" : 500.0
}
代码
match_dict = {"$match":{"item":{"$ne":""}}}
project_dict = {"$project":{"item":1,"description":{"$ifNull":["$description","noField"]}}}
ret = list(db["lakers"].aggregate(pipeline=[match_dict, project_dict]))
# [{'_id': ObjectId('6163ee4a8b72fe2513f74194'), 'item': 'x', 'description': 'product1'}, {'_id': ObjectId('6163ee558b72fe2513f74195'), 'item': 'y', 'description': 'noField'}, {'_id': ObjectId('6163ee5e8b72fe2513f74196'), 'item': 'z', 'description': 'noField'}]
print(ret)
2. $in
作用:相当于$or的用法
{
"_id" : ObjectId("6163ee4a8b72fe2513f74194"),
"item" : "x",
"description" : "product1",
"qty" : 300.0
}
{
"_id" : ObjectId("6163ee558b72fe2513f74195"),
"item" : "y",
"description" : null,
"qty" : 400.0
}
{
"_id" : ObjectId("6163ee5e8b72fe2513f74196"),
"item" : "z",
"qty" : 500.0
}
代码
ret = list(db["lakers"].find({"item":{"$in":["x","y"]}}))
# [{'_id': ObjectId('6163ee4a8b72fe2513f74194'), 'item': 'x', 'description': 'product1', 'qty': 300.0}, {'_id': ObjectId('6163ee558b72fe2513f74195'), 'item': 'y', 'description': None, 'qty': 400.0}]
print(ret)
2. $indexOfArray
作用:返回指定元素在数组中的索引值,字符串形式返回,不存在返回-1
{
"_id" : ObjectId("6163f2058b72fe2513f741a6"),
"items" : [
"one",
"two",
"three"
]
}
{
"_id" : ObjectId("6163f20c8b72fe2513f741a7"),
"items" : [
1.0,
2.0,
3.0
]
}
{
"_id" : ObjectId("6163f2148b72fe2513f741a8"),
"items" : [
null,
null,
2.0
]
}
{
"_id" : ObjectId("6163f21c8b72fe2513f741a9"),
"items" : null
}
{
"_id" : ObjectId("6163f21e8b72fe2513f741aa"),
"items" : 3.0
}
代码
# items字段必须都是array类型才行, 因此删除item:null和item:3.0两条数据
db = client["kobe"]
# match_dict = {"$match":{"items":{"$ne":""}}}
project_dict = {"$project":{"items":1,"index":{"$indexOfArray":["$items",2]}}}
ret = list(db["lakers"].aggregate(pipeline=[project_dict]))
# [{'_id': ObjectId('6163f2058b72fe2513f741a6'), 'items': ['one', 'two', 'three'], 'index': -1}, {'_id': ObjectId('6163f20c8b72fe2513f741a7'), 'items': [1.0, 2.0, 3.0], 'index': 1}, {'_id': ObjectId('6163f2148b72fe2513f741a8'), 'items': [None, None, 2.0], 'index': 2}] # 存在返回正常的索引值,不存在返回-1
print(ret)
== index R ==
1. $replaceRoot
作用:替换文档,提升文档显示级别,pymongo只识别$replaceWith(replaceRoot的别名)
数据:
{
"name" : "zhang",
"age" : 10.0,
"hobby" : {
"school" : "study",
"home" : "game"
},
"_id" : ObjectId("61274f451d2b613cc230639e")
}
需要显示:
{"school" : "study", "home" : "game"}
代码
ret = db["test"]["test_aps"].aggregate([{"$match":{"_id":{"$ne":""}}},{"$replaceWith":{"new_dict":"$hobby"}}])
# 结果
[{'new_dict': {'school': 'study', 'home': 'game'}}]
配合match,concat使用
ret = db["test"]["test_aps"].aggregate([
{"$match":{"_id":{"$ne":""}}},
{"$replaceWith":{"new_dict":"$hobby"}},
{"$replaceWith":{"new_dict":{"concat_new_dict":{"$concat":["$new_dict.school","-100-100-","$new_dict.home"]}}}}
])
# 结果
[{'new_dict': {'concat_new_dict': 'study-100-100-game'}}]
2. $redact
作用:修订/校验,意思是对文档内容进行过滤,选择一些过滤或保留的信息,常常和$cond配合一起使用
数据:
{
"_id" : ObjectId("61650096797a5b40b2b3d54f"),
"id" : 1.0,
"to" : [
{
"i" : 2.0
},
{
"i" : 4.0,
"valid" : true
}
]
}
代码
# 想要to中只保留没有valid的{}
3. $range
作用:根据指定的数组生成一个等差数列
{
"_id" : ObjectId("616503d8797a5b40b2b3d569"),
"city" : "xian",
"distance" : 200.0
}
{
"_id" : ObjectId("616503df797a5b40b2b3d56a"),
"city" : "lanzhou",
"distance" : 800.0
}
{
"_id" : ObjectId("616503ea797a5b40b2b3d56b"),
"city" : "chengdu",
"distance" : 1350.0
}
代码
project_dict = {"$project": {"_id":0,"distance":1,"city":1,"newList":{"$range":[0,"$distance",100]}}}
# [{'city': 'xian', 'distance': 200.0, 'newList': [0, 100]}, {'city': 'lanzhou', 'distance': 800.0, 'newList': [0, 100, 200, 300, 400, 500, 600, 700]}, {'city': 'chengdu', 'distance': 1350.0, 'newList': [0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300]}]
ret = list(db["lakers"].aggregate(pipeline=[project_dict]))
print(ret)
4. $reduce
作用:

浙公网安备 33010602011771号